From 3e121542d8b7ab5201c47bbd3ba5611a23c54759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Parm=C3=A9nides=20GV?= Date: Wed, 11 Jun 2014 11:56:59 +0200 Subject: Correctly connects to millipede. Location keyword on android.cfg isn't supported, EIP corresponding code has been commented out. I think we should support it in ics-openvpn, so that we can show the location instead of the server name. I've updated all opensssl, openvpn, etc. subprojects from rev 813 of ics-openvpn, and jni too. --- app/openssl/Android.mk | 16 +- app/openssl/Apps-config-host.mk | 119 + app/openssl/Apps-config-target.mk | 124 + app/openssl/Apps.mk | 22 + app/openssl/CleanSpec.mk | 1 + app/openssl/Crypto-config-host.mk | 698 +++++ app/openssl/Crypto-config-target.mk | 703 +++++ app/openssl/Crypto-config-trusty.mk | 262 ++ app/openssl/Crypto.mk | 76 + app/openssl/README.android | 35 +- app/openssl/Ssl-config-host.mk | 113 + app/openssl/Ssl-config-target.mk | 118 + app/openssl/Ssl.mk | 66 + app/openssl/ThirdPartyProject.prop | 10 - app/openssl/android-config.mk | 47 +- app/openssl/android.testssl/CAss.cnf | 2 +- app/openssl/android.testssl/Uss.cnf | 4 +- app/openssl/android.testssl/server2.pem | 422 +-- app/openssl/android.testssl/testssl | 44 +- app/openssl/apps/#Android.mk# | 78 - app/openssl/apps/Android.mk | 78 - app/openssl/apps/apps.c | 131 +- app/openssl/apps/apps.h | 17 +- app/openssl/apps/ca.c | 67 +- app/openssl/apps/ciphers.c | 2 +- app/openssl/apps/client.pem | 68 +- app/openssl/apps/cms.c | 41 +- app/openssl/apps/crl.c | 18 + app/openssl/apps/dgst.c | 16 +- app/openssl/apps/dhparam.c | 1 - app/openssl/apps/dsaparam.c | 9 +- app/openssl/apps/ecparam.c | 4 +- app/openssl/apps/enc.c | 8 + app/openssl/apps/genrsa.c | 2 +- app/openssl/apps/md4.c | 128 +- app/openssl/apps/ocsp.c | 2 +- app/openssl/apps/openssl.c | 17 + app/openssl/apps/openssl.cnf | 2 +- app/openssl/apps/pkcs12.c | 9 +- app/openssl/apps/progs.h | 4 + app/openssl/apps/progs.pl | 2 + app/openssl/apps/req.c | 99 +- app/openssl/apps/s_cb.c | 78 +- app/openssl/apps/s_client.c | 399 ++- app/openssl/apps/s_server.c | 349 ++- app/openssl/apps/s_socket.c | 7 +- app/openssl/apps/server.pem | 415 +-- app/openssl/apps/server2.pem | 422 +-- app/openssl/apps/sess_id.c | 18 +- app/openssl/apps/speed.c | 75 +- app/openssl/apps/srp.c | 756 +++++ app/openssl/apps/verify.c | 28 +- app/openssl/apps/x509.c | 38 +- app/openssl/build-config-32.mk | 62 + app/openssl/build-config-64.mk | 62 + app/openssl/build-config-static-32.mk | 33 + app/openssl/build-config-static-64.mk | 33 + app/openssl/build-config-trusty.mk | 100 + app/openssl/check-all-builds.sh | 641 ++++ app/openssl/crypto/Android.mk | 559 ---- app/openssl/crypto/LPdir_win32.c | 30 + app/openssl/crypto/aes/aes.h | 5 + app/openssl/crypto/aes/aes_core.c | 12 +- app/openssl/crypto/aes/aes_misc.c | 21 + app/openssl/crypto/aes/asm/aes-586.S | 3239 ++++++++++++++++++++ app/openssl/crypto/aes/asm/aes-586.pl | 20 +- app/openssl/crypto/aes/asm/aes-armv4.S | 1 + app/openssl/crypto/aes/asm/aes-armv4.pl | 182 +- app/openssl/crypto/aes/asm/aes-armv4.s | 177 +- app/openssl/crypto/aes/asm/aes-mips.S | 1337 ++++++++ app/openssl/crypto/aes/asm/aes-mips.pl | 1611 ++++++++++ app/openssl/crypto/aes/asm/aes-parisc.pl | 1022 ++++++ app/openssl/crypto/aes/asm/aes-ppc.pl | 444 ++- app/openssl/crypto/aes/asm/aes-s390x.pl | 1054 ++++++- app/openssl/crypto/aes/asm/aes-sparcv9.pl | 3 +- app/openssl/crypto/aes/asm/aes-x86_64.S | 2541 +++++++++++++++ app/openssl/crypto/aes/asm/aes-x86_64.pl | 48 +- app/openssl/crypto/aes/asm/aesni-sha1-x86_64.S | 1396 +++++++++ app/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl | 1250 ++++++++ app/openssl/crypto/aes/asm/aesni-x86.S | 2143 +++++++++++++ app/openssl/crypto/aes/asm/aesni-x86.pl | 2189 +++++++++++++ app/openssl/crypto/aes/asm/aesni-x86_64.S | 2535 +++++++++++++++ app/openssl/crypto/aes/asm/aesni-x86_64.pl | 3069 +++++++++++++++++++ app/openssl/crypto/aes/asm/bsaes-armv7.S | 2544 +++++++++++++++ app/openssl/crypto/aes/asm/bsaes-armv7.pl | 2467 +++++++++++++++ app/openssl/crypto/aes/asm/bsaes-x86_64.S | 2498 +++++++++++++++ app/openssl/crypto/aes/asm/bsaes-x86_64.pl | 3108 +++++++++++++++++++ app/openssl/crypto/aes/asm/vpaes-x86.S | 661 ++++ app/openssl/crypto/aes/asm/vpaes-x86.pl | 903 ++++++ app/openssl/crypto/aes/asm/vpaes-x86_64.S | 828 +++++ app/openssl/crypto/aes/asm/vpaes-x86_64.pl | 1207 ++++++++ app/openssl/crypto/arm_arch.h | 51 + app/openssl/crypto/armcap.c | 80 + app/openssl/crypto/armv4cpuid.S | 154 + app/openssl/crypto/asn1/a_d2i_fp.c | 54 +- app/openssl/crypto/asn1/a_digest.c | 6 +- app/openssl/crypto/asn1/a_int.c | 6 +- app/openssl/crypto/asn1/a_sign.c | 111 +- app/openssl/crypto/asn1/a_strex.c | 1 + app/openssl/crypto/asn1/a_verify.c | 83 +- app/openssl/crypto/asn1/ameth_lib.c | 12 +- app/openssl/crypto/asn1/asn1.h | 8 +- app/openssl/crypto/asn1/asn1_err.c | 7 +- app/openssl/crypto/asn1/asn1_locl.h | 11 + app/openssl/crypto/asn1/asn_mime.c | 23 +- app/openssl/crypto/asn1/n_pkey.c | 38 +- app/openssl/crypto/asn1/p5_pbev2.c | 143 +- app/openssl/crypto/asn1/t_crl.c | 3 +- app/openssl/crypto/asn1/t_x509.c | 55 +- app/openssl/crypto/asn1/tasn_prn.c | 12 +- app/openssl/crypto/asn1/x_algor.c | 14 + app/openssl/crypto/asn1/x_name.c | 3 +- app/openssl/crypto/asn1/x_pubkey.c | 14 +- app/openssl/crypto/bf/asm/bf-586.S | 896 ++++++ app/openssl/crypto/bf/bf_skey.c | 8 + app/openssl/crypto/bf/blowfish.h | 4 +- app/openssl/crypto/bio/b_sock.c | 13 +- app/openssl/crypto/bio/bf_buff.c | 15 +- app/openssl/crypto/bio/bio.h | 79 +- app/openssl/crypto/bio/bio_err.c | 3 +- app/openssl/crypto/bio/bio_lib.c | 28 +- app/openssl/crypto/bio/bss_bio.c | 18 +- app/openssl/crypto/bio/bss_dgram.c | 1073 ++++++- app/openssl/crypto/bio/bss_log.c | 2 +- app/openssl/crypto/bn/asm/armv4-gf2m.S | 213 ++ app/openssl/crypto/bn/asm/armv4-gf2m.pl | 278 ++ app/openssl/crypto/bn/asm/armv4-mont.S | 1 + app/openssl/crypto/bn/asm/armv4-mont.pl | 23 +- app/openssl/crypto/bn/asm/armv4-mont.s | 20 +- app/openssl/crypto/bn/asm/bn-586.S | 1533 +++++++++ app/openssl/crypto/bn/asm/bn-mips.S | 2175 +++++++++++++ app/openssl/crypto/bn/asm/co-586.S | 1254 ++++++++ app/openssl/crypto/bn/asm/ia64-mont.pl | 851 +++++ app/openssl/crypto/bn/asm/ia64.S | 2 +- app/openssl/crypto/bn/asm/mips-mont.S | 284 ++ app/openssl/crypto/bn/asm/mips-mont.pl | 426 +++ app/openssl/crypto/bn/asm/mips.pl | 2583 ++++++++++++++++ app/openssl/crypto/bn/asm/modexp512-x86_64.S | 1773 +++++++++++ app/openssl/crypto/bn/asm/modexp512-x86_64.pl | 1497 +++++++++ app/openssl/crypto/bn/asm/parisc-mont.pl | 995 ++++++ app/openssl/crypto/bn/asm/ppc-mont.pl | 107 +- app/openssl/crypto/bn/asm/ppc.pl | 45 +- app/openssl/crypto/bn/asm/ppc64-mont.pl | 338 +- app/openssl/crypto/bn/asm/s390x-gf2m.pl | 221 ++ app/openssl/crypto/bn/asm/s390x-mont.pl | 102 +- app/openssl/crypto/bn/asm/x86-gf2m.S | 347 +++ app/openssl/crypto/bn/asm/x86-gf2m.pl | 313 ++ app/openssl/crypto/bn/asm/x86-mont.S | 460 +++ app/openssl/crypto/bn/asm/x86-mont.pl | 4 +- app/openssl/crypto/bn/asm/x86_64-gcc.c | 2 +- app/openssl/crypto/bn/asm/x86_64-gf2m.S | 291 ++ app/openssl/crypto/bn/asm/x86_64-gf2m.pl | 390 +++ app/openssl/crypto/bn/asm/x86_64-mont.S | 1374 +++++++++ app/openssl/crypto/bn/asm/x86_64-mont.pl | 1489 ++++++++- app/openssl/crypto/bn/asm/x86_64-mont5.S | 784 +++++ app/openssl/crypto/bn/asm/x86_64-mont5.pl | 1071 +++++++ app/openssl/crypto/bn/bn.h | 32 + app/openssl/crypto/bn/bn_blind.c | 37 +- app/openssl/crypto/bn/bn_div.c | 274 +- app/openssl/crypto/bn/bn_err.c | 2 + app/openssl/crypto/bn/bn_exp.c | 240 +- app/openssl/crypto/bn/bn_gcd.c | 1 + app/openssl/crypto/bn/bn_gf2m.c | 114 +- app/openssl/crypto/bn/bn_lcl.h | 30 +- app/openssl/crypto/bn/bn_lib.c | 71 +- app/openssl/crypto/bn/bn_mont.c | 116 +- app/openssl/crypto/bn/bn_nist.c | 387 ++- app/openssl/crypto/bn/bn_print.c | 19 + app/openssl/crypto/bn/bn_rand.c | 70 + app/openssl/crypto/bn/bn_shift.c | 27 +- app/openssl/crypto/bn/bn_word.c | 25 +- app/openssl/crypto/bn/bntest.c | 8 +- app/openssl/crypto/buffer/buf_str.c | 119 + app/openssl/crypto/buffer/buffer.c | 79 +- app/openssl/crypto/buffer/buffer.h | 2 +- app/openssl/crypto/cmac/cm_ameth.c | 97 + app/openssl/crypto/cmac/cm_pmeth.c | 224 ++ app/openssl/crypto/cmac/cmac.c | 308 ++ app/openssl/crypto/cmac/cmac.h | 82 + app/openssl/crypto/cms/cms.h | 501 +++ app/openssl/crypto/cms/cms_asn1.c | 389 +++ app/openssl/crypto/cms/cms_att.c | 195 ++ app/openssl/crypto/cms/cms_cd.c | 136 + app/openssl/crypto/cms/cms_dd.c | 148 + app/openssl/crypto/cms/cms_enc.c | 294 ++ app/openssl/crypto/cms/cms_env.c | 876 ++++++ app/openssl/crypto/cms/cms_err.c | 245 ++ app/openssl/crypto/cms/cms_ess.c | 420 +++ app/openssl/crypto/cms/cms_io.c | 133 + app/openssl/crypto/cms/cms_lcl.h | 473 +++ app/openssl/crypto/cms/cms_lib.c | 622 ++++ app/openssl/crypto/cms/cms_pwri.c | 454 +++ app/openssl/crypto/cms/cms_sd.c | 985 ++++++ app/openssl/crypto/cms/cms_smime.c | 850 +++++ app/openssl/crypto/comp/c_rle.c | 4 +- app/openssl/crypto/conf/conf_mall.c | 1 + app/openssl/crypto/cpt_err.c | 4 +- app/openssl/crypto/cryptlib.c | 58 +- app/openssl/crypto/cryptlib.h | 4 +- app/openssl/crypto/crypto.h | 40 +- app/openssl/crypto/des/asm/crypt586.S | 879 ++++++ app/openssl/crypto/des/asm/des-586.S | 1837 +++++++++++ app/openssl/crypto/des/des.h | 3 + app/openssl/crypto/des/set_key.c | 8 + app/openssl/crypto/des/str2key.c | 2 +- app/openssl/crypto/dh/dh.h | 20 + app/openssl/crypto/dh/dh_ameth.c | 1 + app/openssl/crypto/dh/dh_err.c | 7 +- app/openssl/crypto/dh/dh_gen.c | 17 + app/openssl/crypto/dh/dh_key.c | 33 +- app/openssl/crypto/dh/dh_lib.c | 15 +- app/openssl/crypto/dsa/dsa.h | 30 +- app/openssl/crypto/dsa/dsa_ameth.c | 47 + app/openssl/crypto/dsa/dsa_asn1.c | 40 +- app/openssl/crypto/dsa/dsa_err.c | 8 +- app/openssl/crypto/dsa/dsa_gen.c | 35 +- app/openssl/crypto/dsa/dsa_key.c | 16 + app/openssl/crypto/dsa/dsa_lib.c | 22 +- app/openssl/crypto/dsa/dsa_locl.h | 1 + app/openssl/crypto/dsa/dsa_ossl.c | 44 +- app/openssl/crypto/dsa/dsa_pmeth.c | 6 +- app/openssl/crypto/dsa/dsa_sign.c | 57 +- app/openssl/crypto/dsa/dsa_vrf.c | 29 +- app/openssl/crypto/dso/dso_dlfcn.c | 3 +- app/openssl/crypto/dso/dso_lib.c | 8 + app/openssl/crypto/ec/ec.h | 108 +- app/openssl/crypto/ec/ec2_mult.c | 31 +- app/openssl/crypto/ec/ec2_oct.c | 407 +++ app/openssl/crypto/ec/ec2_smpl.c | 353 +-- app/openssl/crypto/ec/ec_ameth.c | 3 +- app/openssl/crypto/ec/ec_asn1.c | 30 +- app/openssl/crypto/ec/ec_curve.c | 197 +- app/openssl/crypto/ec/ec_cvt.c | 28 +- app/openssl/crypto/ec/ec_err.c | 20 +- app/openssl/crypto/ec/ec_key.c | 127 +- app/openssl/crypto/ec/ec_lcl.h | 56 +- app/openssl/crypto/ec/ec_lib.c | 90 +- app/openssl/crypto/ec/ec_oct.c | 199 ++ app/openssl/crypto/ec/ec_pmeth.c | 3 +- app/openssl/crypto/ec/eck_prn.c | 3 +- app/openssl/crypto/ec/ecp_mont.c | 13 +- app/openssl/crypto/ec/ecp_nist.c | 13 +- app/openssl/crypto/ec/ecp_oct.c | 433 +++ app/openssl/crypto/ec/ecp_smpl.c | 379 +-- app/openssl/crypto/ec/ectest.c | 343 ++- app/openssl/crypto/ecdh/ecdh.h | 2 + app/openssl/crypto/ecdh/ecdhtest.c | 6 + app/openssl/crypto/ecdh/ech_err.c | 4 +- app/openssl/crypto/ecdh/ech_key.c | 3 - app/openssl/crypto/ecdh/ech_lib.c | 31 +- app/openssl/crypto/ecdh/ech_locl.h | 8 + app/openssl/crypto/ecdh/ech_ossl.c | 2 + app/openssl/crypto/ecdsa/ecdsa.h | 3 + app/openssl/crypto/ecdsa/ecdsatest.c | 89 +- app/openssl/crypto/ecdsa/ecs_err.c | 5 +- app/openssl/crypto/ecdsa/ecs_lib.c | 32 +- app/openssl/crypto/ecdsa/ecs_locl.h | 13 +- app/openssl/crypto/ecdsa/ecs_ossl.c | 43 +- app/openssl/crypto/ecdsa/ecs_sign.c | 10 +- app/openssl/crypto/engine/eng_all.c | 9 + app/openssl/crypto/engine/eng_cryptodev.c | 71 +- app/openssl/crypto/engine/eng_dyn.c | 5 +- app/openssl/crypto/engine/eng_fat.c | 3 +- app/openssl/crypto/engine/eng_list.c | 1 + app/openssl/crypto/engine/engine.h | 9 + app/openssl/crypto/engine/tb_asnmth.c | 246 ++ app/openssl/crypto/engine/tb_pkmeth.c | 167 + app/openssl/crypto/err/err.c | 13 +- app/openssl/crypto/err/err.h | 3 +- app/openssl/crypto/err/err_all.c | 12 +- app/openssl/crypto/evp/bio_b64.c | 2 +- app/openssl/crypto/evp/bio_md.c | 11 +- app/openssl/crypto/evp/bio_ok.c | 103 +- app/openssl/crypto/evp/c_allc.c | 18 +- app/openssl/crypto/evp/digest.c | 34 +- app/openssl/crypto/evp/e_aes.c | 1290 +++++++- app/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c | 581 ++++ app/openssl/crypto/evp/e_des3.c | 9 +- app/openssl/crypto/evp/e_null.c | 4 +- app/openssl/crypto/evp/e_rc2.c | 3 +- app/openssl/crypto/evp/e_rc4.c | 1 + app/openssl/crypto/evp/e_rc4_hmac_md5.c | 298 ++ app/openssl/crypto/evp/evp.h | 102 +- app/openssl/crypto/evp/evp_cnf.c | 125 + app/openssl/crypto/evp/evp_enc.c | 95 +- app/openssl/crypto/evp/evp_err.c | 26 +- app/openssl/crypto/evp/evp_key.c | 27 +- app/openssl/crypto/evp/evp_lib.c | 4 + app/openssl/crypto/evp/evp_locl.h | 40 + app/openssl/crypto/evp/evp_pbe.c | 5 + app/openssl/crypto/evp/evptests.txt | 13 + app/openssl/crypto/evp/m_dss.c | 4 +- app/openssl/crypto/evp/m_dss1.c | 5 +- app/openssl/crypto/evp/m_ecdsa.c | 3 + app/openssl/crypto/evp/m_md4.c | 2 + app/openssl/crypto/evp/m_md5.c | 1 + app/openssl/crypto/evp/m_mdc2.c | 2 + app/openssl/crypto/evp/m_ripemd.c | 1 + app/openssl/crypto/evp/m_sha1.c | 7 +- app/openssl/crypto/evp/m_wp.c | 1 + app/openssl/crypto/evp/names.c | 5 + app/openssl/crypto/evp/p5_crpt.c | 33 +- app/openssl/crypto/evp/p5_crpt2.c | 121 +- app/openssl/crypto/evp/p_lib.c | 6 + app/openssl/crypto/evp/p_open.c | 3 +- app/openssl/crypto/evp/p_seal.c | 3 +- app/openssl/crypto/evp/p_sign.c | 10 +- app/openssl/crypto/evp/p_verify.c | 10 +- app/openssl/crypto/evp/pmeth_gn.c | 5 +- app/openssl/crypto/evp/pmeth_lib.c | 55 +- app/openssl/crypto/hmac/hm_ameth.c | 2 +- app/openssl/crypto/hmac/hm_pmeth.c | 14 +- app/openssl/crypto/hmac/hmac.c | 37 + app/openssl/crypto/ia64cpuid.S | 2 +- app/openssl/crypto/md4/md4.h | 3 + app/openssl/crypto/md4/md4_dgst.c | 36 +- app/openssl/crypto/md4/md4_locl.h | 8 +- app/openssl/crypto/md5/asm/md5-586.S | 679 ++++ app/openssl/crypto/md5/asm/md5-x86_64.S | 668 ++++ app/openssl/crypto/md5/asm/md5-x86_64.pl | 3 +- app/openssl/crypto/md5/md5.h | 3 + app/openssl/crypto/md5/md5_dgst.c | 3 +- app/openssl/crypto/md5/md5_locl.h | 8 +- app/openssl/crypto/mdc2/mdc2.h | 3 + app/openssl/crypto/mdc2/mdc2dgst.c | 3 +- app/openssl/crypto/mem.c | 8 +- app/openssl/crypto/modes/asm/ghash-alpha.pl | 460 +++ app/openssl/crypto/modes/asm/ghash-armv4.S | 408 +++ app/openssl/crypto/modes/asm/ghash-armv4.pl | 429 +++ app/openssl/crypto/modes/asm/ghash-ia64.pl | 463 +++ app/openssl/crypto/modes/asm/ghash-parisc.pl | 731 +++++ app/openssl/crypto/modes/asm/ghash-s390x.pl | 262 ++ app/openssl/crypto/modes/asm/ghash-sparcv9.pl | 330 ++ app/openssl/crypto/modes/asm/ghash-x86.S | 1269 ++++++++ app/openssl/crypto/modes/asm/ghash-x86.pl | 1342 ++++++++ app/openssl/crypto/modes/asm/ghash-x86_64.S | 1026 +++++++ app/openssl/crypto/modes/asm/ghash-x86_64.pl | 806 +++++ app/openssl/crypto/modes/cbc128.c | 35 +- app/openssl/crypto/modes/ccm128.c | 441 +++ app/openssl/crypto/modes/cfb128.c | 11 +- app/openssl/crypto/modes/ctr128.c | 92 +- app/openssl/crypto/modes/gcm128.c | 1905 ++++++++++++ app/openssl/crypto/modes/modes_lcl.h | 128 + app/openssl/crypto/modes/ofb128.c | 11 +- app/openssl/crypto/modes/xts128.c | 187 ++ app/openssl/crypto/o_init.c | 82 + app/openssl/crypto/objects/o_names.c | 2 +- app/openssl/crypto/objects/obj_dat.h | 136 +- app/openssl/crypto/objects/obj_mac.h | 142 +- app/openssl/crypto/objects/obj_mac.num | 27 + app/openssl/crypto/objects/obj_xref.c | 9 +- app/openssl/crypto/objects/obj_xref.h | 2 + app/openssl/crypto/objects/obj_xref.txt | 4 + app/openssl/crypto/objects/objects.txt | 41 +- app/openssl/crypto/ocsp/ocsp.h | 7 + app/openssl/crypto/ocsp/ocsp_lib.c | 3 +- app/openssl/crypto/ocsp/ocsp_vfy.c | 10 +- app/openssl/crypto/opensslconf-32.h | 316 ++ app/openssl/crypto/opensslconf-64.h | 316 ++ app/openssl/crypto/opensslconf-static-32.h | 316 ++ app/openssl/crypto/opensslconf-static-64.h | 316 ++ app/openssl/crypto/opensslconf-static-trusty.h | 448 +++ app/openssl/crypto/opensslconf-static.h | 6 + app/openssl/crypto/opensslconf-trusty.h | 448 +++ app/openssl/crypto/opensslconf.h | 256 +- app/openssl/crypto/opensslv.h | 6 +- app/openssl/crypto/ossl_typ.h | 2 + app/openssl/crypto/pariscid.pl | 225 ++ app/openssl/crypto/pem/pem_all.c | 161 + app/openssl/crypto/pem/pem_info.c | 1 + app/openssl/crypto/pem/pem_lib.c | 27 +- app/openssl/crypto/pem/pem_seal.c | 6 +- app/openssl/crypto/pem/pvkfmt.c | 58 +- app/openssl/crypto/perlasm/cbc.pl | 2 +- app/openssl/crypto/perlasm/ppc-xlate.pl | 13 +- app/openssl/crypto/perlasm/x86_64-xlate.pl | 221 +- app/openssl/crypto/perlasm/x86asm.pl | 55 +- app/openssl/crypto/perlasm/x86gas.pl | 34 +- app/openssl/crypto/perlasm/x86masm.pl | 20 +- app/openssl/crypto/perlasm/x86nasm.pl | 15 +- app/openssl/crypto/pkcs12/p12_crt.c | 7 + app/openssl/crypto/pkcs12/p12_decr.c | 9 +- app/openssl/crypto/pkcs12/p12_key.c | 40 +- app/openssl/crypto/pkcs12/p12_kiss.c | 2 +- app/openssl/crypto/pkcs12/p12_mutl.c | 12 +- app/openssl/crypto/pkcs7/pk7_doit.c | 101 +- app/openssl/crypto/pkcs7/pk7_smime.c | 25 +- app/openssl/crypto/ppccpuid.pl | 48 +- app/openssl/crypto/rand/md_rand.c | 58 +- app/openssl/crypto/rand/rand.h | 10 + app/openssl/crypto/rand/rand_err.c | 7 +- app/openssl/crypto/rand/rand_lib.c | 130 + app/openssl/crypto/rand/rand_unix.c | 108 +- app/openssl/crypto/rand/rand_win.c | 2 +- app/openssl/crypto/rand/randfile.c | 4 +- app/openssl/crypto/rc2/rc2.h | 4 +- app/openssl/crypto/rc2/rc2_skey.c | 8 + app/openssl/crypto/rc4/asm/rc4-586.pl | 162 +- app/openssl/crypto/rc4/asm/rc4-md5-x86_64.S | 1259 ++++++++ app/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl | 632 ++++ app/openssl/crypto/rc4/asm/rc4-parisc.pl | 314 ++ app/openssl/crypto/rc4/asm/rc4-s390x.pl | 47 +- app/openssl/crypto/rc4/asm/rc4-x86_64.S | 615 ++++ app/openssl/crypto/rc4/asm/rc4-x86_64.pl | 295 +- app/openssl/crypto/rc4/rc4.h | 1 + app/openssl/crypto/rc4/rc4_skey.c | 36 +- app/openssl/crypto/rc4/rc4_utl.c | 62 + app/openssl/crypto/rc4/rc4test.c | 6 + app/openssl/crypto/ripemd/ripemd.h | 3 + app/openssl/crypto/ripemd/rmd_dgst.c | 33 +- app/openssl/crypto/ripemd/rmd_locl.h | 10 +- app/openssl/crypto/rsa/rsa.h | 81 +- app/openssl/crypto/rsa/rsa_ameth.c | 351 ++- app/openssl/crypto/rsa/rsa_asn1.c | 10 + app/openssl/crypto/rsa/rsa_chk.c | 6 + app/openssl/crypto/rsa/rsa_crpt.c | 257 ++ app/openssl/crypto/rsa/rsa_eay.c | 86 +- app/openssl/crypto/rsa/rsa_err.c | 21 +- app/openssl/crypto/rsa/rsa_gen.c | 15 + app/openssl/crypto/rsa/rsa_lib.c | 172 +- app/openssl/crypto/rsa/rsa_oaep.c | 8 +- app/openssl/crypto/rsa/rsa_pmeth.c | 156 +- app/openssl/crypto/rsa/rsa_pss.c | 81 +- app/openssl/crypto/rsa/rsa_sign.c | 33 + app/openssl/crypto/s390xcap.c | 12 +- app/openssl/crypto/s390xcpuid.S | 17 +- app/openssl/crypto/sha/asm/sha1-586.S | 2639 ++++++++++++++++ app/openssl/crypto/sha/asm/sha1-586.pl | 1107 ++++++- app/openssl/crypto/sha/asm/sha1-alpha.pl | 322 ++ app/openssl/crypto/sha/asm/sha1-armv4-large.S | 1 + app/openssl/crypto/sha/asm/sha1-armv4-large.pl | 41 +- app/openssl/crypto/sha/asm/sha1-armv4-large.s | 209 +- app/openssl/crypto/sha/asm/sha1-ia64.pl | 193 +- app/openssl/crypto/sha/asm/sha1-mips.S | 1664 ++++++++++ app/openssl/crypto/sha/asm/sha1-mips.pl | 354 +++ app/openssl/crypto/sha/asm/sha1-parisc.pl | 260 ++ app/openssl/crypto/sha/asm/sha1-ppc.pl | 83 +- app/openssl/crypto/sha/asm/sha1-s390x.pl | 50 +- app/openssl/crypto/sha/asm/sha1-sparcv9a.pl | 2 +- app/openssl/crypto/sha/asm/sha1-x86_64.S | 2486 +++++++++++++++ app/openssl/crypto/sha/asm/sha1-x86_64.pl | 1188 ++++++- app/openssl/crypto/sha/asm/sha256-586.S | 258 ++ app/openssl/crypto/sha/asm/sha256-586.pl | 54 +- app/openssl/crypto/sha/asm/sha256-armv4.S | 1 + app/openssl/crypto/sha/asm/sha256-armv4.pl | 55 +- app/openssl/crypto/sha/asm/sha256-armv4.s | 858 ++++-- app/openssl/crypto/sha/asm/sha256-mips.S | 1998 ++++++++++++ app/openssl/crypto/sha/asm/sha256-x86_64.S | 1778 +++++++++++ app/openssl/crypto/sha/asm/sha512-586.S | 836 +++++ app/openssl/crypto/sha/asm/sha512-586.pl | 18 +- app/openssl/crypto/sha/asm/sha512-armv4.S | 1 + app/openssl/crypto/sha/asm/sha512-armv4.pl | 357 ++- app/openssl/crypto/sha/asm/sha512-armv4.s | 1635 +++++++++- app/openssl/crypto/sha/asm/sha512-mips.pl | 455 +++ app/openssl/crypto/sha/asm/sha512-parisc.pl | 793 +++++ app/openssl/crypto/sha/asm/sha512-ppc.pl | 114 +- app/openssl/crypto/sha/asm/sha512-s390x.pl | 63 +- app/openssl/crypto/sha/asm/sha512-sparcv9.pl | 6 +- app/openssl/crypto/sha/asm/sha512-x86_64.S | 1802 +++++++++++ app/openssl/crypto/sha/asm/sha512-x86_64.pl | 89 +- app/openssl/crypto/sha/sha.h | 14 + app/openssl/crypto/sha/sha1_one.c | 2 +- app/openssl/crypto/sha/sha1dgst.c | 1 + app/openssl/crypto/sha/sha256.c | 10 +- app/openssl/crypto/sha/sha512.c | 63 +- app/openssl/crypto/sha/sha_dgst.c | 1 + app/openssl/crypto/sha/sha_locl.h | 46 +- app/openssl/crypto/sparccpuid.S | 6 +- app/openssl/crypto/sparcv9cap.c | 4 +- app/openssl/crypto/srp/srp.h | 172 ++ app/openssl/crypto/srp/srp_grps.h | 517 ++++ app/openssl/crypto/srp/srp_lcl.h | 83 + app/openssl/crypto/srp/srp_lib.c | 361 +++ app/openssl/crypto/srp/srp_vfy.c | 658 ++++ app/openssl/crypto/stack/safestack.h | 138 +- app/openssl/crypto/symhacks.h | 38 +- app/openssl/crypto/ui/ui.h | 2 +- app/openssl/crypto/ui/ui_openssl.c | 6 + app/openssl/crypto/x509/by_dir.c | 6 +- app/openssl/crypto/x509/x509.h | 11 + app/openssl/crypto/x509/x509_cmp.c | 34 +- app/openssl/crypto/x509/x509_lu.c | 2 +- app/openssl/crypto/x509/x509_vfy.c | 27 +- app/openssl/crypto/x509/x509type.c | 32 +- app/openssl/crypto/x509/x_all.c | 21 + app/openssl/crypto/x509v3/v3_addr.c | 125 +- app/openssl/crypto/x509v3/v3_asid.c | 63 +- app/openssl/crypto/x509v3/v3_pci.c | 2 +- app/openssl/crypto/x509v3/v3_pcia.c | 2 +- app/openssl/crypto/x509v3/v3_purp.c | 4 +- app/openssl/crypto/x509v3/v3_skey.c | 3 +- app/openssl/crypto/x509v3/v3_utl.c | 2 +- app/openssl/crypto/x86_64cpuid.S | 234 ++ app/openssl/crypto/x86_64cpuid.pl | 88 +- app/openssl/crypto/x86cpuid.S | 346 +++ app/openssl/crypto/x86cpuid.pl | 84 +- app/openssl/e_os.h | 19 +- app/openssl/e_os2.h | 28 +- app/openssl/import_openssl.sh | 452 ++- app/openssl/include/openssl/aes.h | 5 + app/openssl/include/openssl/asn1.h | 8 +- app/openssl/include/openssl/bio.h | 79 +- app/openssl/include/openssl/blowfish.h | 4 +- app/openssl/include/openssl/bn.h | 32 + app/openssl/include/openssl/buffer.h | 2 +- app/openssl/include/openssl/cmac.h | 82 + app/openssl/include/openssl/cms.h | 501 +++ app/openssl/include/openssl/crypto.h | 40 +- app/openssl/include/openssl/des.h | 3 + app/openssl/include/openssl/dh.h | 20 + app/openssl/include/openssl/dsa.h | 30 +- app/openssl/include/openssl/dtls1.h | 26 +- app/openssl/include/openssl/e_os2.h | 28 +- app/openssl/include/openssl/ec.h | 108 +- app/openssl/include/openssl/ecdh.h | 2 + app/openssl/include/openssl/ecdsa.h | 3 + app/openssl/include/openssl/engine.h | 9 + app/openssl/include/openssl/err.h | 3 +- app/openssl/include/openssl/evp.h | 102 +- app/openssl/include/openssl/kssl.h | 13 + app/openssl/include/openssl/md4.h | 3 + app/openssl/include/openssl/md5.h | 3 + app/openssl/include/openssl/modes.h | 76 + app/openssl/include/openssl/obj_mac.h | 142 +- app/openssl/include/openssl/ocsp.h | 7 + app/openssl/include/openssl/opensslconf-32.h | 316 ++ app/openssl/include/openssl/opensslconf-64.h | 316 ++ .../include/openssl/opensslconf-static-32.h | 316 ++ .../include/openssl/opensslconf-static-64.h | 316 ++ .../include/openssl/opensslconf-static-trusty.h | 448 +++ app/openssl/include/openssl/opensslconf-static.h | 6 + app/openssl/include/openssl/opensslconf-trusty.h | 448 +++ app/openssl/include/openssl/opensslconf.h | 256 +- app/openssl/include/openssl/opensslv.h | 6 +- app/openssl/include/openssl/ossl_typ.h | 2 + app/openssl/include/openssl/rand.h | 10 + app/openssl/include/openssl/rc2.h | 4 +- app/openssl/include/openssl/rc4.h | 1 + app/openssl/include/openssl/ripemd.h | 3 + app/openssl/include/openssl/rsa.h | 81 +- app/openssl/include/openssl/safestack.h | 138 +- app/openssl/include/openssl/sha.h | 14 + app/openssl/include/openssl/srp.h | 172 ++ app/openssl/include/openssl/srtp.h | 145 + app/openssl/include/openssl/ssl.h | 449 ++- app/openssl/include/openssl/ssl2.h | 4 + app/openssl/include/openssl/ssl3.h | 108 +- app/openssl/include/openssl/symhacks.h | 38 +- app/openssl/include/openssl/tls1.h | 224 +- app/openssl/include/openssl/ts.h | 3 - app/openssl/include/openssl/ui.h | 2 +- app/openssl/include/openssl/x509.h | 11 + app/openssl/ndk-build-clear.mk | 14 + app/openssl/ndk-build.mk | 3 + app/openssl/openssl.config | 961 +++++- app/openssl/openssl.trusty.config | 278 ++ app/openssl/openssl.version | 3 +- app/openssl/patches/README | 61 +- app/openssl/patches/apps_Android.mk | 87 - app/openssl/patches/crypto_Android.mk | 559 ---- app/openssl/patches/ssl_Android.mk | 98 - app/openssl/rules.mk | 40 + app/openssl/ssl/Android.mk | 78 - app/openssl/ssl/bio_ssl.c | 2 + app/openssl/ssl/d1_both.c | 224 +- app/openssl/ssl/d1_clnt.c | 229 +- app/openssl/ssl/d1_enc.c | 59 +- app/openssl/ssl/d1_lib.c | 64 +- app/openssl/ssl/d1_pkt.c | 340 +- app/openssl/ssl/d1_srtp.c | 494 +++ app/openssl/ssl/d1_srvr.c | 217 +- app/openssl/ssl/dtls1.h | 26 +- app/openssl/ssl/kssl.c | 16 + app/openssl/ssl/kssl.h | 13 + app/openssl/ssl/s23_clnt.c | 140 +- app/openssl/ssl/s23_meth.c | 4 + app/openssl/ssl/s23_srvr.c | 59 +- app/openssl/ssl/s2_clnt.c | 14 +- app/openssl/ssl/s2_pkt.c | 3 +- app/openssl/ssl/s2_srvr.c | 16 +- app/openssl/ssl/s3_both.c | 46 +- app/openssl/ssl/s3_cbc.c | 790 +++++ app/openssl/ssl/s3_clnt.c | 877 ++++-- app/openssl/ssl/s3_enc.c | 164 +- app/openssl/ssl/s3_lib.c | 1499 +++++++-- app/openssl/ssl/s3_pkt.c | 345 ++- app/openssl/ssl/s3_srvr.c | 1127 +++++-- app/openssl/ssl/srtp.h | 145 + app/openssl/ssl/ssl.h | 449 ++- app/openssl/ssl/ssl2.h | 4 + app/openssl/ssl/ssl3.h | 108 +- app/openssl/ssl/ssl_algs.c | 10 + app/openssl/ssl/ssl_asn1.c | 50 + app/openssl/ssl/ssl_cert.c | 1 - app/openssl/ssl/ssl_ciph.c | 139 +- app/openssl/ssl/ssl_err.c | 50 +- app/openssl/ssl/ssl_lib.c | 323 +- app/openssl/ssl/ssl_locl.h | 122 +- app/openssl/ssl/ssl_rsa.c | 16 +- app/openssl/ssl/ssl_sess.c | 172 +- app/openssl/ssl/ssl_txt.c | 8 + app/openssl/ssl/ssltest.c | 178 +- app/openssl/ssl/t1_clnt.c | 21 +- app/openssl/ssl/t1_enc.c | 479 ++- app/openssl/ssl/t1_lib.c | 1307 +++++++- app/openssl/ssl/t1_meth.c | 22 +- app/openssl/ssl/t1_srvr.c | 21 +- app/openssl/ssl/tls1.h | 224 +- app/openssl/ssl/tls_srp.c | 507 +++ 609 files changed, 144082 insertions(+), 10411 deletions(-) create mode 100644 app/openssl/Apps-config-host.mk create mode 100644 app/openssl/Apps-config-target.mk create mode 100644 app/openssl/Apps.mk create mode 100644 app/openssl/Crypto-config-host.mk create mode 100644 app/openssl/Crypto-config-target.mk create mode 100644 app/openssl/Crypto-config-trusty.mk create mode 100644 app/openssl/Crypto.mk create mode 100644 app/openssl/Ssl-config-host.mk create mode 100644 app/openssl/Ssl-config-target.mk create mode 100644 app/openssl/Ssl.mk delete mode 100644 app/openssl/ThirdPartyProject.prop delete mode 100644 app/openssl/apps/#Android.mk# delete mode 100644 app/openssl/apps/Android.mk mode change 100644 => 120000 app/openssl/apps/md4.c create mode 100644 app/openssl/apps/srp.c create mode 100644 app/openssl/build-config-32.mk create mode 100644 app/openssl/build-config-64.mk create mode 100644 app/openssl/build-config-static-32.mk create mode 100644 app/openssl/build-config-static-64.mk create mode 100644 app/openssl/build-config-trusty.mk create mode 100755 app/openssl/check-all-builds.sh delete mode 100644 app/openssl/crypto/Android.mk create mode 100644 app/openssl/crypto/LPdir_win32.c create mode 100644 app/openssl/crypto/aes/asm/aes-586.S mode change 100755 => 100644 app/openssl/crypto/aes/asm/aes-586.pl create mode 120000 app/openssl/crypto/aes/asm/aes-armv4.S create mode 100644 app/openssl/crypto/aes/asm/aes-mips.S create mode 100644 app/openssl/crypto/aes/asm/aes-mips.pl create mode 100644 app/openssl/crypto/aes/asm/aes-parisc.pl create mode 100644 app/openssl/crypto/aes/asm/aes-x86_64.S create mode 100644 app/openssl/crypto/aes/asm/aesni-sha1-x86_64.S create mode 100644 app/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl create mode 100644 app/openssl/crypto/aes/asm/aesni-x86.S create mode 100644 app/openssl/crypto/aes/asm/aesni-x86.pl create mode 100644 app/openssl/crypto/aes/asm/aesni-x86_64.S create mode 100644 app/openssl/crypto/aes/asm/aesni-x86_64.pl create mode 100644 app/openssl/crypto/aes/asm/bsaes-armv7.S create mode 100644 app/openssl/crypto/aes/asm/bsaes-armv7.pl create mode 100644 app/openssl/crypto/aes/asm/bsaes-x86_64.S create mode 100644 app/openssl/crypto/aes/asm/bsaes-x86_64.pl create mode 100644 app/openssl/crypto/aes/asm/vpaes-x86.S create mode 100644 app/openssl/crypto/aes/asm/vpaes-x86.pl create mode 100644 app/openssl/crypto/aes/asm/vpaes-x86_64.S create mode 100644 app/openssl/crypto/aes/asm/vpaes-x86_64.pl create mode 100644 app/openssl/crypto/arm_arch.h create mode 100644 app/openssl/crypto/armcap.c create mode 100644 app/openssl/crypto/armv4cpuid.S create mode 100644 app/openssl/crypto/bf/asm/bf-586.S create mode 100644 app/openssl/crypto/bn/asm/armv4-gf2m.S create mode 100644 app/openssl/crypto/bn/asm/armv4-gf2m.pl create mode 120000 app/openssl/crypto/bn/asm/armv4-mont.S create mode 100644 app/openssl/crypto/bn/asm/bn-586.S create mode 100644 app/openssl/crypto/bn/asm/bn-mips.S create mode 100644 app/openssl/crypto/bn/asm/co-586.S create mode 100644 app/openssl/crypto/bn/asm/ia64-mont.pl create mode 100644 app/openssl/crypto/bn/asm/mips-mont.S create mode 100644 app/openssl/crypto/bn/asm/mips-mont.pl create mode 100644 app/openssl/crypto/bn/asm/mips.pl create mode 100644 app/openssl/crypto/bn/asm/modexp512-x86_64.S create mode 100644 app/openssl/crypto/bn/asm/modexp512-x86_64.pl create mode 100644 app/openssl/crypto/bn/asm/parisc-mont.pl create mode 100644 app/openssl/crypto/bn/asm/s390x-gf2m.pl create mode 100644 app/openssl/crypto/bn/asm/x86-gf2m.S create mode 100644 app/openssl/crypto/bn/asm/x86-gf2m.pl create mode 100644 app/openssl/crypto/bn/asm/x86-mont.S create mode 100644 app/openssl/crypto/bn/asm/x86_64-gf2m.S create mode 100644 app/openssl/crypto/bn/asm/x86_64-gf2m.pl create mode 100644 app/openssl/crypto/bn/asm/x86_64-mont.S create mode 100644 app/openssl/crypto/bn/asm/x86_64-mont5.S create mode 100755 app/openssl/crypto/bn/asm/x86_64-mont5.pl create mode 100644 app/openssl/crypto/buffer/buf_str.c create mode 100644 app/openssl/crypto/cmac/cm_ameth.c create mode 100644 app/openssl/crypto/cmac/cm_pmeth.c create mode 100644 app/openssl/crypto/cmac/cmac.c create mode 100644 app/openssl/crypto/cmac/cmac.h create mode 100644 app/openssl/crypto/cms/cms.h create mode 100644 app/openssl/crypto/cms/cms_asn1.c create mode 100644 app/openssl/crypto/cms/cms_att.c create mode 100644 app/openssl/crypto/cms/cms_cd.c create mode 100644 app/openssl/crypto/cms/cms_dd.c create mode 100644 app/openssl/crypto/cms/cms_enc.c create mode 100644 app/openssl/crypto/cms/cms_env.c create mode 100644 app/openssl/crypto/cms/cms_err.c create mode 100644 app/openssl/crypto/cms/cms_ess.c create mode 100644 app/openssl/crypto/cms/cms_io.c create mode 100644 app/openssl/crypto/cms/cms_lcl.h create mode 100644 app/openssl/crypto/cms/cms_lib.c create mode 100644 app/openssl/crypto/cms/cms_pwri.c create mode 100644 app/openssl/crypto/cms/cms_sd.c create mode 100644 app/openssl/crypto/cms/cms_smime.c create mode 100644 app/openssl/crypto/des/asm/crypt586.S create mode 100644 app/openssl/crypto/des/asm/des-586.S create mode 100644 app/openssl/crypto/ec/ec2_oct.c create mode 100644 app/openssl/crypto/ec/ec_oct.c create mode 100644 app/openssl/crypto/ec/ecp_oct.c create mode 100644 app/openssl/crypto/engine/tb_asnmth.c create mode 100644 app/openssl/crypto/engine/tb_pkmeth.c create mode 100644 app/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c create mode 100644 app/openssl/crypto/evp/e_rc4_hmac_md5.c create mode 100644 app/openssl/crypto/evp/evp_cnf.c create mode 100644 app/openssl/crypto/md5/asm/md5-586.S create mode 100644 app/openssl/crypto/md5/asm/md5-x86_64.S create mode 100644 app/openssl/crypto/modes/asm/ghash-alpha.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-armv4.S create mode 100644 app/openssl/crypto/modes/asm/ghash-armv4.pl create mode 100755 app/openssl/crypto/modes/asm/ghash-ia64.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-parisc.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-s390x.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-sparcv9.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-x86.S create mode 100644 app/openssl/crypto/modes/asm/ghash-x86.pl create mode 100644 app/openssl/crypto/modes/asm/ghash-x86_64.S create mode 100644 app/openssl/crypto/modes/asm/ghash-x86_64.pl create mode 100644 app/openssl/crypto/modes/ccm128.c create mode 100644 app/openssl/crypto/modes/gcm128.c create mode 100644 app/openssl/crypto/modes/modes_lcl.h create mode 100644 app/openssl/crypto/modes/xts128.c create mode 100644 app/openssl/crypto/o_init.c create mode 100644 app/openssl/crypto/opensslconf-32.h create mode 100644 app/openssl/crypto/opensslconf-64.h create mode 100644 app/openssl/crypto/opensslconf-static-32.h create mode 100644 app/openssl/crypto/opensslconf-static-64.h create mode 100644 app/openssl/crypto/opensslconf-static-trusty.h create mode 100644 app/openssl/crypto/opensslconf-static.h create mode 100644 app/openssl/crypto/opensslconf-trusty.h create mode 100644 app/openssl/crypto/pariscid.pl create mode 100644 app/openssl/crypto/rc4/asm/rc4-md5-x86_64.S create mode 100644 app/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl create mode 100644 app/openssl/crypto/rc4/asm/rc4-parisc.pl create mode 100644 app/openssl/crypto/rc4/asm/rc4-x86_64.S mode change 100755 => 100644 app/openssl/crypto/rc4/asm/rc4-x86_64.pl create mode 100644 app/openssl/crypto/rc4/rc4_utl.c create mode 100644 app/openssl/crypto/rsa/rsa_crpt.c create mode 100644 app/openssl/crypto/sha/asm/sha1-586.S create mode 100644 app/openssl/crypto/sha/asm/sha1-alpha.pl create mode 120000 app/openssl/crypto/sha/asm/sha1-armv4-large.S create mode 100644 app/openssl/crypto/sha/asm/sha1-mips.S create mode 100644 app/openssl/crypto/sha/asm/sha1-mips.pl create mode 100644 app/openssl/crypto/sha/asm/sha1-parisc.pl create mode 100644 app/openssl/crypto/sha/asm/sha1-x86_64.S create mode 100644 app/openssl/crypto/sha/asm/sha256-586.S create mode 120000 app/openssl/crypto/sha/asm/sha256-armv4.S create mode 100644 app/openssl/crypto/sha/asm/sha256-mips.S create mode 100644 app/openssl/crypto/sha/asm/sha256-x86_64.S create mode 100644 app/openssl/crypto/sha/asm/sha512-586.S create mode 120000 app/openssl/crypto/sha/asm/sha512-armv4.S create mode 100644 app/openssl/crypto/sha/asm/sha512-mips.pl create mode 100755 app/openssl/crypto/sha/asm/sha512-parisc.pl create mode 100644 app/openssl/crypto/sha/asm/sha512-x86_64.S create mode 100644 app/openssl/crypto/srp/srp.h create mode 100644 app/openssl/crypto/srp/srp_grps.h create mode 100644 app/openssl/crypto/srp/srp_lcl.h create mode 100644 app/openssl/crypto/srp/srp_lib.c create mode 100644 app/openssl/crypto/srp/srp_vfy.c create mode 100644 app/openssl/crypto/x86_64cpuid.S create mode 100644 app/openssl/crypto/x86cpuid.S create mode 100644 app/openssl/include/openssl/cmac.h create mode 100644 app/openssl/include/openssl/cms.h create mode 100644 app/openssl/include/openssl/opensslconf-32.h create mode 100644 app/openssl/include/openssl/opensslconf-64.h create mode 100644 app/openssl/include/openssl/opensslconf-static-32.h create mode 100644 app/openssl/include/openssl/opensslconf-static-64.h create mode 100644 app/openssl/include/openssl/opensslconf-static-trusty.h create mode 100644 app/openssl/include/openssl/opensslconf-static.h create mode 100644 app/openssl/include/openssl/opensslconf-trusty.h create mode 100644 app/openssl/include/openssl/srp.h create mode 100644 app/openssl/include/openssl/srtp.h create mode 100644 app/openssl/ndk-build-clear.mk create mode 100644 app/openssl/ndk-build.mk create mode 100644 app/openssl/openssl.trusty.config delete mode 100644 app/openssl/patches/apps_Android.mk delete mode 100644 app/openssl/patches/crypto_Android.mk delete mode 100644 app/openssl/patches/ssl_Android.mk create mode 100644 app/openssl/rules.mk delete mode 100644 app/openssl/ssl/Android.mk create mode 100644 app/openssl/ssl/d1_srtp.c create mode 100644 app/openssl/ssl/s3_cbc.c create mode 100644 app/openssl/ssl/srtp.h create mode 100644 app/openssl/ssl/tls_srp.c (limited to 'app/openssl') diff --git a/app/openssl/Android.mk b/app/openssl/Android.mk index 95f93718..9753ac54 100644 --- a/app/openssl/Android.mk +++ b/app/openssl/Android.mk @@ -1,8 +1,14 @@ LOCAL_PATH := $(call my-dir) -subdirs := $(addprefix $(LOCAL_PATH)/,$(addsuffix /Android.mk, \ - crypto \ - ssl \ - )) +# Enable to be able to use ALOG* with #include "cutils/log.h" +#log_c_includes += system/core/include +#log_shared_libraries := liblog -include $(subdirs) +# These makefiles are here instead of being Android.mk files in the +# respective crypto, ssl, and apps directories so +# that import_openssl.sh import won't remove them. +include $(LOCAL_PATH)/build-config-64.mk +include $(LOCAL_PATH)/build-config-32.mk +include $(LOCAL_PATH)/Crypto.mk +include $(LOCAL_PATH)/Ssl.mk +#include $(LOCAL_PATH)/Apps.mk diff --git a/app/openssl/Apps-config-host.mk b/app/openssl/Apps-config-host.mk new file mode 100644 index 00000000..37dcb78b --- /dev/null +++ b/app/openssl/Apps-config-host.mk @@ -0,0 +1,119 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Apps-config-host.mk + +common_cflags := \ + -DMONOLITH \ + +common_src_files := \ + apps/app_rand.c \ + apps/apps.c \ + apps/asn1pars.c \ + apps/ca.c \ + apps/ciphers.c \ + apps/cms.c \ + apps/crl.c \ + apps/crl2p7.c \ + apps/dgst.c \ + apps/dh.c \ + apps/dhparam.c \ + apps/dsa.c \ + apps/dsaparam.c \ + apps/ec.c \ + apps/ecparam.c \ + apps/enc.c \ + apps/engine.c \ + apps/errstr.c \ + apps/gendh.c \ + apps/gendsa.c \ + apps/genpkey.c \ + apps/genrsa.c \ + apps/nseq.c \ + apps/ocsp.c \ + apps/openssl.c \ + apps/passwd.c \ + apps/pkcs12.c \ + apps/pkcs7.c \ + apps/pkcs8.c \ + apps/pkey.c \ + apps/pkeyparam.c \ + apps/pkeyutl.c \ + apps/prime.c \ + apps/rand.c \ + apps/req.c \ + apps/rsa.c \ + apps/rsautl.c \ + apps/s_cb.c \ + apps/s_client.c \ + apps/s_server.c \ + apps/s_socket.c \ + apps/s_time.c \ + apps/sess_id.c \ + apps/smime.c \ + apps/speed.c \ + apps/spkac.c \ + apps/srp.c \ + apps/verify.c \ + apps/version.c \ + apps/x509.c \ + +common_c_includes := \ + external/openssl/. \ + external/openssl/include \ + +arm_cflags := + +arm_src_files := + +arm_exclude_files := + +arm64_cflags := + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := + +x86_src_files := + +x86_exclude_files := + +x86_64_cflags := + +x86_64_src_files := + +x86_64_exclude_files := + +mips_cflags := + +mips_src_files := + +mips_exclude_files := + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes) + +ifeq ($(HOST_OS),linux) +LOCAL_CFLAGS_x86 += $(x86_cflags) +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files)) +else +$(warning Unknown host OS $(HOST_OS)) +LOCAL_SRC_FILES += $(common_src_files) +endif diff --git a/app/openssl/Apps-config-target.mk b/app/openssl/Apps-config-target.mk new file mode 100644 index 00000000..bccd250d --- /dev/null +++ b/app/openssl/Apps-config-target.mk @@ -0,0 +1,124 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Apps-config-target.mk + +common_cflags := \ + -DMONOLITH \ + +common_src_files := \ + apps/app_rand.c \ + apps/apps.c \ + apps/asn1pars.c \ + apps/ca.c \ + apps/ciphers.c \ + apps/cms.c \ + apps/crl.c \ + apps/crl2p7.c \ + apps/dgst.c \ + apps/dh.c \ + apps/dhparam.c \ + apps/dsa.c \ + apps/dsaparam.c \ + apps/ec.c \ + apps/ecparam.c \ + apps/enc.c \ + apps/engine.c \ + apps/errstr.c \ + apps/gendh.c \ + apps/gendsa.c \ + apps/genpkey.c \ + apps/genrsa.c \ + apps/nseq.c \ + apps/ocsp.c \ + apps/openssl.c \ + apps/passwd.c \ + apps/pkcs12.c \ + apps/pkcs7.c \ + apps/pkcs8.c \ + apps/pkey.c \ + apps/pkeyparam.c \ + apps/pkeyutl.c \ + apps/prime.c \ + apps/rand.c \ + apps/req.c \ + apps/rsa.c \ + apps/rsautl.c \ + apps/s_cb.c \ + apps/s_client.c \ + apps/s_server.c \ + apps/s_socket.c \ + apps/s_time.c \ + apps/sess_id.c \ + apps/smime.c \ + apps/speed.c \ + apps/spkac.c \ + apps/srp.c \ + apps/verify.c \ + apps/version.c \ + apps/x509.c \ + +common_c_includes := \ + external/openssl/. \ + external/openssl/include \ + +arm_cflags := + +arm_src_files := + +arm_exclude_files := + +arm64_cflags := + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := + +x86_src_files := + +x86_exclude_files := + +x86_64_cflags := + +x86_64_src_files := + +x86_64_exclude_files := + +mips_cflags := + +mips_src_files := + +mips_exclude_files := + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) + +LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files)) +LOCAL_CFLAGS_arm += $(arm_cflags) + +LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files)) +LOCAL_CFLAGS_arm64 += $(arm64_cflags) + +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86 += $(x86_cflags) + +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) + +LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files)) +LOCAL_CFLAGS_mips += $(mips_cflags) diff --git a/app/openssl/Apps.mk b/app/openssl/Apps.mk new file mode 100644 index 00000000..3fb94dbe --- /dev/null +++ b/app/openssl/Apps.mk @@ -0,0 +1,22 @@ +# Copyright 2006 The Android Open Source Project + +LOCAL_PATH:= $(call my-dir) + +include $(CLEAR_VARS) +LOCAL_MODULE:= openssl +LOCAL_CLANG := true +LOCAL_MODULE_TAGS := optional +LOCAL_SHARED_LIBRARIES := libssl libcrypto +include $(LOCAL_PATH)/Apps-config-target.mk +include $(LOCAL_PATH)/android-config.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Apps.mk +include $(BUILD_EXECUTABLE) + +include $(CLEAR_VARS) +LOCAL_MODULE:= openssl +LOCAL_MODULE_TAGS := optional +LOCAL_SHARED_LIBRARIES := libssl-host libcrypto-host +include $(LOCAL_PATH)/Apps-config-host.mk +include $(LOCAL_PATH)/android-config.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Apps.mk +include $(BUILD_HOST_EXECUTABLE) diff --git a/app/openssl/CleanSpec.mk b/app/openssl/CleanSpec.mk index 7549ef98..3c1f8503 100644 --- a/app/openssl/CleanSpec.mk +++ b/app/openssl/CleanSpec.mk @@ -54,6 +54,7 @@ $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libssl_interme $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libcrypto_intermediates) $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libssl_static_intermediates) $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libcrypto_static_intermediates) +$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/EXECUTABLES/*ssl*_intermediates $(PRODUCT_OUT)/obj/*/libssl_*intermediates $(PRODUCT_OUT)/obj/*/libcrypto_*intermediates) # ************************************************ # NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST diff --git a/app/openssl/Crypto-config-host.mk b/app/openssl/Crypto-config-host.mk new file mode 100644 index 00000000..a377fec4 --- /dev/null +++ b/app/openssl/Crypto-config-host.mk @@ -0,0 +1,698 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-host.mk + +common_cflags := \ + -DNO_WINDOWS_BRAINDEATH \ + +common_src_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_cfb.c \ + crypto/aes/aes_core.c \ + crypto/aes/aes_ctr.c \ + crypto/aes/aes_ecb.c \ + crypto/aes/aes_misc.c \ + crypto/aes/aes_ofb.c \ + crypto/aes/aes_wrap.c \ + crypto/asn1/a_bitstr.c \ + crypto/asn1/a_bool.c \ + crypto/asn1/a_bytes.c \ + crypto/asn1/a_d2i_fp.c \ + crypto/asn1/a_digest.c \ + crypto/asn1/a_dup.c \ + crypto/asn1/a_enum.c \ + crypto/asn1/a_gentm.c \ + crypto/asn1/a_i2d_fp.c \ + crypto/asn1/a_int.c \ + crypto/asn1/a_mbstr.c \ + crypto/asn1/a_object.c \ + crypto/asn1/a_octet.c \ + crypto/asn1/a_print.c \ + crypto/asn1/a_set.c \ + crypto/asn1/a_sign.c \ + crypto/asn1/a_strex.c \ + crypto/asn1/a_strnid.c \ + crypto/asn1/a_time.c \ + crypto/asn1/a_type.c \ + crypto/asn1/a_utctm.c \ + crypto/asn1/a_utf8.c \ + crypto/asn1/a_verify.c \ + crypto/asn1/ameth_lib.c \ + crypto/asn1/asn1_err.c \ + crypto/asn1/asn1_gen.c \ + crypto/asn1/asn1_lib.c \ + crypto/asn1/asn1_par.c \ + crypto/asn1/asn_mime.c \ + crypto/asn1/asn_moid.c \ + crypto/asn1/asn_pack.c \ + crypto/asn1/bio_asn1.c \ + crypto/asn1/bio_ndef.c \ + crypto/asn1/d2i_pr.c \ + crypto/asn1/d2i_pu.c \ + crypto/asn1/evp_asn1.c \ + crypto/asn1/f_enum.c \ + crypto/asn1/f_int.c \ + crypto/asn1/f_string.c \ + crypto/asn1/i2d_pr.c \ + crypto/asn1/i2d_pu.c \ + crypto/asn1/n_pkey.c \ + crypto/asn1/nsseq.c \ + crypto/asn1/p5_pbe.c \ + crypto/asn1/p5_pbev2.c \ + crypto/asn1/p8_pkey.c \ + crypto/asn1/t_bitst.c \ + crypto/asn1/t_crl.c \ + crypto/asn1/t_pkey.c \ + crypto/asn1/t_req.c \ + crypto/asn1/t_spki.c \ + crypto/asn1/t_x509.c \ + crypto/asn1/t_x509a.c \ + crypto/asn1/tasn_dec.c \ + crypto/asn1/tasn_enc.c \ + crypto/asn1/tasn_fre.c \ + crypto/asn1/tasn_new.c \ + crypto/asn1/tasn_prn.c \ + crypto/asn1/tasn_typ.c \ + crypto/asn1/tasn_utl.c \ + crypto/asn1/x_algor.c \ + crypto/asn1/x_attrib.c \ + crypto/asn1/x_bignum.c \ + crypto/asn1/x_crl.c \ + crypto/asn1/x_exten.c \ + crypto/asn1/x_info.c \ + crypto/asn1/x_long.c \ + crypto/asn1/x_name.c \ + crypto/asn1/x_nx509.c \ + crypto/asn1/x_pkey.c \ + crypto/asn1/x_pubkey.c \ + crypto/asn1/x_req.c \ + crypto/asn1/x_sig.c \ + crypto/asn1/x_spki.c \ + crypto/asn1/x_val.c \ + crypto/asn1/x_x509.c \ + crypto/asn1/x_x509a.c \ + crypto/bf/bf_cfb64.c \ + crypto/bf/bf_ecb.c \ + crypto/bf/bf_enc.c \ + crypto/bf/bf_ofb64.c \ + crypto/bf/bf_skey.c \ + crypto/bio/b_dump.c \ + crypto/bio/b_print.c \ + crypto/bio/b_sock.c \ + crypto/bio/bf_buff.c \ + crypto/bio/bf_nbio.c \ + crypto/bio/bf_null.c \ + crypto/bio/bio_cb.c \ + crypto/bio/bio_err.c \ + crypto/bio/bio_lib.c \ + crypto/bio/bss_acpt.c \ + crypto/bio/bss_bio.c \ + crypto/bio/bss_conn.c \ + crypto/bio/bss_dgram.c \ + crypto/bio/bss_fd.c \ + crypto/bio/bss_file.c \ + crypto/bio/bss_log.c \ + crypto/bio/bss_mem.c \ + crypto/bio/bss_null.c \ + crypto/bio/bss_sock.c \ + crypto/bn/bn_add.c \ + crypto/bn/bn_asm.c \ + crypto/bn/bn_blind.c \ + crypto/bn/bn_const.c \ + crypto/bn/bn_ctx.c \ + crypto/bn/bn_div.c \ + crypto/bn/bn_err.c \ + crypto/bn/bn_exp.c \ + crypto/bn/bn_exp2.c \ + crypto/bn/bn_gcd.c \ + crypto/bn/bn_gf2m.c \ + crypto/bn/bn_kron.c \ + crypto/bn/bn_lib.c \ + crypto/bn/bn_mod.c \ + crypto/bn/bn_mont.c \ + crypto/bn/bn_mpi.c \ + crypto/bn/bn_mul.c \ + crypto/bn/bn_nist.c \ + crypto/bn/bn_prime.c \ + crypto/bn/bn_print.c \ + crypto/bn/bn_rand.c \ + crypto/bn/bn_recp.c \ + crypto/bn/bn_shift.c \ + crypto/bn/bn_sqr.c \ + crypto/bn/bn_sqrt.c \ + crypto/bn/bn_word.c \ + crypto/buffer/buf_err.c \ + crypto/buffer/buf_str.c \ + crypto/buffer/buffer.c \ + crypto/cmac/cm_ameth.c \ + crypto/cmac/cm_pmeth.c \ + crypto/cmac/cmac.c \ + crypto/cms/cms_asn1.c \ + crypto/cms/cms_att.c \ + crypto/cms/cms_cd.c \ + crypto/cms/cms_dd.c \ + crypto/cms/cms_enc.c \ + crypto/cms/cms_env.c \ + crypto/cms/cms_err.c \ + crypto/cms/cms_ess.c \ + crypto/cms/cms_io.c \ + crypto/cms/cms_lib.c \ + crypto/cms/cms_pwri.c \ + crypto/cms/cms_sd.c \ + crypto/cms/cms_smime.c \ + crypto/comp/c_rle.c \ + crypto/comp/c_zlib.c \ + crypto/comp/comp_err.c \ + crypto/comp/comp_lib.c \ + crypto/conf/conf_api.c \ + crypto/conf/conf_def.c \ + crypto/conf/conf_err.c \ + crypto/conf/conf_lib.c \ + crypto/conf/conf_mall.c \ + crypto/conf/conf_mod.c \ + crypto/conf/conf_sap.c \ + crypto/cpt_err.c \ + crypto/cryptlib.c \ + crypto/cversion.c \ + crypto/des/cbc_cksm.c \ + crypto/des/cbc_enc.c \ + crypto/des/cfb64ede.c \ + crypto/des/cfb64enc.c \ + crypto/des/cfb_enc.c \ + crypto/des/des_enc.c \ + crypto/des/des_old.c \ + crypto/des/des_old2.c \ + crypto/des/ecb3_enc.c \ + crypto/des/ecb_enc.c \ + crypto/des/ede_cbcm_enc.c \ + crypto/des/enc_read.c \ + crypto/des/enc_writ.c \ + crypto/des/fcrypt.c \ + crypto/des/fcrypt_b.c \ + crypto/des/ofb64ede.c \ + crypto/des/ofb64enc.c \ + crypto/des/ofb_enc.c \ + crypto/des/pcbc_enc.c \ + crypto/des/qud_cksm.c \ + crypto/des/rand_key.c \ + crypto/des/read2pwd.c \ + crypto/des/rpc_enc.c \ + crypto/des/set_key.c \ + crypto/des/str2key.c \ + crypto/des/xcbc_enc.c \ + crypto/dh/dh_ameth.c \ + crypto/dh/dh_asn1.c \ + crypto/dh/dh_check.c \ + crypto/dh/dh_depr.c \ + crypto/dh/dh_err.c \ + crypto/dh/dh_gen.c \ + crypto/dh/dh_key.c \ + crypto/dh/dh_lib.c \ + crypto/dh/dh_pmeth.c \ + crypto/dsa/dsa_ameth.c \ + crypto/dsa/dsa_asn1.c \ + crypto/dsa/dsa_depr.c \ + crypto/dsa/dsa_err.c \ + crypto/dsa/dsa_gen.c \ + crypto/dsa/dsa_key.c \ + crypto/dsa/dsa_lib.c \ + crypto/dsa/dsa_ossl.c \ + crypto/dsa/dsa_pmeth.c \ + crypto/dsa/dsa_prn.c \ + crypto/dsa/dsa_sign.c \ + crypto/dsa/dsa_vrf.c \ + crypto/dso/dso_dl.c \ + crypto/dso/dso_dlfcn.c \ + crypto/dso/dso_err.c \ + crypto/dso/dso_lib.c \ + crypto/dso/dso_null.c \ + crypto/dso/dso_openssl.c \ + crypto/ebcdic.c \ + crypto/ec/ec2_mult.c \ + crypto/ec/ec2_oct.c \ + crypto/ec/ec2_smpl.c \ + crypto/ec/ec_ameth.c \ + crypto/ec/ec_asn1.c \ + crypto/ec/ec_check.c \ + crypto/ec/ec_curve.c \ + crypto/ec/ec_cvt.c \ + crypto/ec/ec_err.c \ + crypto/ec/ec_key.c \ + crypto/ec/ec_lib.c \ + crypto/ec/ec_mult.c \ + crypto/ec/ec_oct.c \ + crypto/ec/ec_pmeth.c \ + crypto/ec/ec_print.c \ + crypto/ec/eck_prn.c \ + crypto/ec/ecp_mont.c \ + crypto/ec/ecp_nist.c \ + crypto/ec/ecp_oct.c \ + crypto/ec/ecp_smpl.c \ + crypto/ecdh/ech_err.c \ + crypto/ecdh/ech_key.c \ + crypto/ecdh/ech_lib.c \ + crypto/ecdh/ech_ossl.c \ + crypto/ecdsa/ecs_asn1.c \ + crypto/ecdsa/ecs_err.c \ + crypto/ecdsa/ecs_lib.c \ + crypto/ecdsa/ecs_ossl.c \ + crypto/ecdsa/ecs_sign.c \ + crypto/ecdsa/ecs_vrf.c \ + crypto/engine/eng_all.c \ + crypto/engine/eng_cnf.c \ + crypto/engine/eng_ctrl.c \ + crypto/engine/eng_dyn.c \ + crypto/engine/eng_err.c \ + crypto/engine/eng_fat.c \ + crypto/engine/eng_init.c \ + crypto/engine/eng_lib.c \ + crypto/engine/eng_list.c \ + crypto/engine/eng_pkey.c \ + crypto/engine/eng_table.c \ + crypto/engine/tb_asnmth.c \ + crypto/engine/tb_cipher.c \ + crypto/engine/tb_dh.c \ + crypto/engine/tb_digest.c \ + crypto/engine/tb_dsa.c \ + crypto/engine/tb_ecdh.c \ + crypto/engine/tb_ecdsa.c \ + crypto/engine/tb_pkmeth.c \ + crypto/engine/tb_rand.c \ + crypto/engine/tb_rsa.c \ + crypto/engine/tb_store.c \ + crypto/err/err.c \ + crypto/err/err_all.c \ + crypto/err/err_prn.c \ + crypto/evp/bio_b64.c \ + crypto/evp/bio_enc.c \ + crypto/evp/bio_md.c \ + crypto/evp/bio_ok.c \ + crypto/evp/c_all.c \ + crypto/evp/c_allc.c \ + crypto/evp/c_alld.c \ + crypto/evp/digest.c \ + crypto/evp/e_aes.c \ + crypto/evp/e_aes_cbc_hmac_sha1.c \ + crypto/evp/e_bf.c \ + crypto/evp/e_des.c \ + crypto/evp/e_des3.c \ + crypto/evp/e_null.c \ + crypto/evp/e_old.c \ + crypto/evp/e_rc2.c \ + crypto/evp/e_rc4.c \ + crypto/evp/e_rc4_hmac_md5.c \ + crypto/evp/e_rc5.c \ + crypto/evp/e_xcbc_d.c \ + crypto/evp/encode.c \ + crypto/evp/evp_acnf.c \ + crypto/evp/evp_cnf.c \ + crypto/evp/evp_enc.c \ + crypto/evp/evp_err.c \ + crypto/evp/evp_key.c \ + crypto/evp/evp_lib.c \ + crypto/evp/evp_pbe.c \ + crypto/evp/evp_pkey.c \ + crypto/evp/m_dss.c \ + crypto/evp/m_dss1.c \ + crypto/evp/m_ecdsa.c \ + crypto/evp/m_md4.c \ + crypto/evp/m_md5.c \ + crypto/evp/m_mdc2.c \ + crypto/evp/m_null.c \ + crypto/evp/m_ripemd.c \ + crypto/evp/m_sha1.c \ + crypto/evp/m_sigver.c \ + crypto/evp/m_wp.c \ + crypto/evp/names.c \ + crypto/evp/p5_crpt.c \ + crypto/evp/p5_crpt2.c \ + crypto/evp/p_dec.c \ + crypto/evp/p_enc.c \ + crypto/evp/p_lib.c \ + crypto/evp/p_open.c \ + crypto/evp/p_seal.c \ + crypto/evp/p_sign.c \ + crypto/evp/p_verify.c \ + crypto/evp/pmeth_fn.c \ + crypto/evp/pmeth_gn.c \ + crypto/evp/pmeth_lib.c \ + crypto/ex_data.c \ + crypto/hmac/hm_ameth.c \ + crypto/hmac/hm_pmeth.c \ + crypto/hmac/hmac.c \ + crypto/krb5/krb5_asn.c \ + crypto/lhash/lh_stats.c \ + crypto/lhash/lhash.c \ + crypto/md4/md4_dgst.c \ + crypto/md4/md4_one.c \ + crypto/md5/md5_dgst.c \ + crypto/md5/md5_one.c \ + crypto/mem.c \ + crypto/mem_clr.c \ + crypto/mem_dbg.c \ + crypto/modes/cbc128.c \ + crypto/modes/ccm128.c \ + crypto/modes/cfb128.c \ + crypto/modes/ctr128.c \ + crypto/modes/gcm128.c \ + crypto/modes/ofb128.c \ + crypto/modes/xts128.c \ + crypto/o_dir.c \ + crypto/o_init.c \ + crypto/o_str.c \ + crypto/o_time.c \ + crypto/objects/o_names.c \ + crypto/objects/obj_dat.c \ + crypto/objects/obj_err.c \ + crypto/objects/obj_lib.c \ + crypto/objects/obj_xref.c \ + crypto/ocsp/ocsp_asn.c \ + crypto/ocsp/ocsp_cl.c \ + crypto/ocsp/ocsp_err.c \ + crypto/ocsp/ocsp_ext.c \ + crypto/ocsp/ocsp_ht.c \ + crypto/ocsp/ocsp_lib.c \ + crypto/ocsp/ocsp_prn.c \ + crypto/ocsp/ocsp_srv.c \ + crypto/ocsp/ocsp_vfy.c \ + crypto/pem/pem_all.c \ + crypto/pem/pem_err.c \ + crypto/pem/pem_info.c \ + crypto/pem/pem_lib.c \ + crypto/pem/pem_oth.c \ + crypto/pem/pem_pk8.c \ + crypto/pem/pem_pkey.c \ + crypto/pem/pem_seal.c \ + crypto/pem/pem_sign.c \ + crypto/pem/pem_x509.c \ + crypto/pem/pem_xaux.c \ + crypto/pem/pvkfmt.c \ + crypto/pkcs12/p12_add.c \ + crypto/pkcs12/p12_asn.c \ + crypto/pkcs12/p12_attr.c \ + crypto/pkcs12/p12_crpt.c \ + crypto/pkcs12/p12_crt.c \ + crypto/pkcs12/p12_decr.c \ + crypto/pkcs12/p12_init.c \ + crypto/pkcs12/p12_key.c \ + crypto/pkcs12/p12_kiss.c \ + crypto/pkcs12/p12_mutl.c \ + crypto/pkcs12/p12_npas.c \ + crypto/pkcs12/p12_p8d.c \ + crypto/pkcs12/p12_p8e.c \ + crypto/pkcs12/p12_utl.c \ + crypto/pkcs12/pk12err.c \ + crypto/pkcs7/pk7_asn1.c \ + crypto/pkcs7/pk7_attr.c \ + crypto/pkcs7/pk7_doit.c \ + crypto/pkcs7/pk7_lib.c \ + crypto/pkcs7/pk7_mime.c \ + crypto/pkcs7/pk7_smime.c \ + crypto/pkcs7/pkcs7err.c \ + crypto/pqueue/pqueue.c \ + crypto/rand/md_rand.c \ + crypto/rand/rand_egd.c \ + crypto/rand/rand_err.c \ + crypto/rand/rand_lib.c \ + crypto/rand/rand_unix.c \ + crypto/rand/rand_win.c \ + crypto/rand/randfile.c \ + crypto/rc2/rc2_cbc.c \ + crypto/rc2/rc2_ecb.c \ + crypto/rc2/rc2_skey.c \ + crypto/rc2/rc2cfb64.c \ + crypto/rc2/rc2ofb64.c \ + crypto/rc4/rc4_enc.c \ + crypto/rc4/rc4_skey.c \ + crypto/rc4/rc4_utl.c \ + crypto/ripemd/rmd_dgst.c \ + crypto/ripemd/rmd_one.c \ + crypto/rsa/rsa_ameth.c \ + crypto/rsa/rsa_asn1.c \ + crypto/rsa/rsa_chk.c \ + crypto/rsa/rsa_crpt.c \ + crypto/rsa/rsa_eay.c \ + crypto/rsa/rsa_err.c \ + crypto/rsa/rsa_gen.c \ + crypto/rsa/rsa_lib.c \ + crypto/rsa/rsa_none.c \ + crypto/rsa/rsa_null.c \ + crypto/rsa/rsa_oaep.c \ + crypto/rsa/rsa_pk1.c \ + crypto/rsa/rsa_pmeth.c \ + crypto/rsa/rsa_prn.c \ + crypto/rsa/rsa_pss.c \ + crypto/rsa/rsa_saos.c \ + crypto/rsa/rsa_sign.c \ + crypto/rsa/rsa_ssl.c \ + crypto/rsa/rsa_x931.c \ + crypto/sha/sha1_one.c \ + crypto/sha/sha1dgst.c \ + crypto/sha/sha256.c \ + crypto/sha/sha512.c \ + crypto/sha/sha_dgst.c \ + crypto/srp/srp_lib.c \ + crypto/srp/srp_vfy.c \ + crypto/stack/stack.c \ + crypto/ts/ts_err.c \ + crypto/txt_db/txt_db.c \ + crypto/ui/ui_compat.c \ + crypto/ui/ui_err.c \ + crypto/ui/ui_lib.c \ + crypto/ui/ui_openssl.c \ + crypto/ui/ui_util.c \ + crypto/uid.c \ + crypto/x509/by_dir.c \ + crypto/x509/by_file.c \ + crypto/x509/x509_att.c \ + crypto/x509/x509_cmp.c \ + crypto/x509/x509_d2.c \ + crypto/x509/x509_def.c \ + crypto/x509/x509_err.c \ + crypto/x509/x509_ext.c \ + crypto/x509/x509_lu.c \ + crypto/x509/x509_obj.c \ + crypto/x509/x509_r2x.c \ + crypto/x509/x509_req.c \ + crypto/x509/x509_set.c \ + crypto/x509/x509_trs.c \ + crypto/x509/x509_txt.c \ + crypto/x509/x509_v3.c \ + crypto/x509/x509_vfy.c \ + crypto/x509/x509_vpm.c \ + crypto/x509/x509cset.c \ + crypto/x509/x509name.c \ + crypto/x509/x509rset.c \ + crypto/x509/x509spki.c \ + crypto/x509/x509type.c \ + crypto/x509/x_all.c \ + crypto/x509v3/pcy_cache.c \ + crypto/x509v3/pcy_data.c \ + crypto/x509v3/pcy_lib.c \ + crypto/x509v3/pcy_map.c \ + crypto/x509v3/pcy_node.c \ + crypto/x509v3/pcy_tree.c \ + crypto/x509v3/v3_akey.c \ + crypto/x509v3/v3_akeya.c \ + crypto/x509v3/v3_alt.c \ + crypto/x509v3/v3_bcons.c \ + crypto/x509v3/v3_bitst.c \ + crypto/x509v3/v3_conf.c \ + crypto/x509v3/v3_cpols.c \ + crypto/x509v3/v3_crld.c \ + crypto/x509v3/v3_enum.c \ + crypto/x509v3/v3_extku.c \ + crypto/x509v3/v3_genn.c \ + crypto/x509v3/v3_ia5.c \ + crypto/x509v3/v3_info.c \ + crypto/x509v3/v3_int.c \ + crypto/x509v3/v3_lib.c \ + crypto/x509v3/v3_ncons.c \ + crypto/x509v3/v3_ocsp.c \ + crypto/x509v3/v3_pci.c \ + crypto/x509v3/v3_pcia.c \ + crypto/x509v3/v3_pcons.c \ + crypto/x509v3/v3_pku.c \ + crypto/x509v3/v3_pmaps.c \ + crypto/x509v3/v3_prn.c \ + crypto/x509v3/v3_purp.c \ + crypto/x509v3/v3_skey.c \ + crypto/x509v3/v3_sxnet.c \ + crypto/x509v3/v3_utl.c \ + crypto/x509v3/v3err.c \ + +common_c_includes := \ + external/openssl/. \ + external/openssl/crypto \ + external/openssl/crypto/asn1 \ + external/openssl/crypto/evp \ + external/openssl/crypto/modes \ + external/openssl/include \ + external/openssl/include/openssl \ + +arm_cflags := \ + -DAES_ASM \ + -DBSAES_ASM \ + -DGHASH_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + +arm_src_files := \ + crypto/aes/asm/aes-armv4.S \ + crypto/aes/asm/bsaes-armv7.S \ + crypto/armcap.c \ + crypto/armv4cpuid.S \ + crypto/bn/asm/armv4-gf2m.S \ + crypto/bn/asm/armv4-mont.S \ + crypto/modes/asm/ghash-armv4.S \ + crypto/sha/asm/sha1-armv4-large.S \ + crypto/sha/asm/sha256-armv4.S \ + crypto/sha/asm/sha512-armv4.S \ + +arm_exclude_files := \ + crypto/aes/aes_core.c \ + crypto/mem_clr.c \ + +arm64_cflags := \ + -DOPENSSL_NO_ASM \ + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := \ + -DAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ + -DDES_UNROLL \ + -DGHASH_ASM \ + -DMD5_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_BN_ASM_PART_WORDS \ + -DOPENSSL_CPUID_OBJ \ + -DOPENSSL_IA32_SSE2 \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + -DVPAES_ASM \ + +x86_src_files := \ + crypto/aes/asm/aes-586.S \ + crypto/aes/asm/aesni-x86.S \ + crypto/aes/asm/vpaes-x86.S \ + crypto/bf/asm/bf-586.S \ + crypto/bn/asm/bn-586.S \ + crypto/bn/asm/co-586.S \ + crypto/bn/asm/x86-gf2m.S \ + crypto/bn/asm/x86-mont.S \ + crypto/des/asm/crypt586.S \ + crypto/des/asm/des-586.S \ + crypto/md5/asm/md5-586.S \ + crypto/modes/asm/ghash-x86.S \ + crypto/sha/asm/sha1-586.S \ + crypto/sha/asm/sha256-586.S \ + crypto/sha/asm/sha512-586.S \ + crypto/x86cpuid.S \ + +x86_exclude_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_core.c \ + crypto/bf/bf_enc.c \ + crypto/bn/bn_asm.c \ + crypto/des/des_enc.c \ + crypto/des/fcrypt_b.c \ + crypto/mem_clr.c \ + +x86_64_cflags := \ + -DAES_ASM \ + -DBSAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ + -DDES_UNROLL \ + -DGHASH_ASM \ + -DMD5_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_BN_ASM_MONT5 \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + -DVPAES_ASM \ + +x86_64_src_files := \ + crypto/aes/asm/aes-x86_64.S \ + crypto/aes/asm/aesni-sha1-x86_64.S \ + crypto/aes/asm/aesni-x86_64.S \ + crypto/aes/asm/bsaes-x86_64.S \ + crypto/aes/asm/vpaes-x86_64.S \ + crypto/bn/asm/modexp512-x86_64.S \ + crypto/bn/asm/x86_64-gcc.c \ + crypto/bn/asm/x86_64-gf2m.S \ + crypto/bn/asm/x86_64-mont.S \ + crypto/bn/asm/x86_64-mont5.S \ + crypto/md5/asm/md5-x86_64.S \ + crypto/modes/asm/ghash-x86_64.S \ + crypto/rc4/asm/rc4-md5-x86_64.S \ + crypto/rc4/asm/rc4-x86_64.S \ + crypto/sha/asm/sha1-x86_64.S \ + crypto/sha/asm/sha256-x86_64.S \ + crypto/sha/asm/sha512-x86_64.S \ + crypto/x86_64cpuid.S \ + +x86_64_exclude_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_core.c \ + crypto/bn/bn_asm.c \ + crypto/mem_clr.c \ + crypto/rc4/rc4_enc.c \ + crypto/rc4/rc4_skey.c \ + +mips_cflags := \ + -DAES_ASM \ + -DOPENSSL_BN_ASM_MONT \ + -DSHA1_ASM \ + -DSHA256_ASM \ + +mips_src_files := \ + crypto/aes/asm/aes-mips.S \ + crypto/bn/asm/bn-mips.S \ + crypto/bn/asm/mips-mont.S \ + crypto/sha/asm/sha1-mips.S \ + crypto/sha/asm/sha256-mips.S \ + +mips_exclude_files := \ + crypto/aes/aes_core.c \ + crypto/bn/bn_asm.c \ + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes) + +ifeq ($(HOST_OS),linux) +LOCAL_CFLAGS_x86 += $(x86_cflags) +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files)) +else +$(warning Unknown host OS $(HOST_OS)) +LOCAL_SRC_FILES += $(common_src_files) +endif diff --git a/app/openssl/Crypto-config-target.mk b/app/openssl/Crypto-config-target.mk new file mode 100644 index 00000000..2c5b01e5 --- /dev/null +++ b/app/openssl/Crypto-config-target.mk @@ -0,0 +1,703 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-target.mk + +common_cflags := \ + -DNO_WINDOWS_BRAINDEATH \ + +common_src_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_cfb.c \ + crypto/aes/aes_core.c \ + crypto/aes/aes_ctr.c \ + crypto/aes/aes_ecb.c \ + crypto/aes/aes_misc.c \ + crypto/aes/aes_ofb.c \ + crypto/aes/aes_wrap.c \ + crypto/asn1/a_bitstr.c \ + crypto/asn1/a_bool.c \ + crypto/asn1/a_bytes.c \ + crypto/asn1/a_d2i_fp.c \ + crypto/asn1/a_digest.c \ + crypto/asn1/a_dup.c \ + crypto/asn1/a_enum.c \ + crypto/asn1/a_gentm.c \ + crypto/asn1/a_i2d_fp.c \ + crypto/asn1/a_int.c \ + crypto/asn1/a_mbstr.c \ + crypto/asn1/a_object.c \ + crypto/asn1/a_octet.c \ + crypto/asn1/a_print.c \ + crypto/asn1/a_set.c \ + crypto/asn1/a_sign.c \ + crypto/asn1/a_strex.c \ + crypto/asn1/a_strnid.c \ + crypto/asn1/a_time.c \ + crypto/asn1/a_type.c \ + crypto/asn1/a_utctm.c \ + crypto/asn1/a_utf8.c \ + crypto/asn1/a_verify.c \ + crypto/asn1/ameth_lib.c \ + crypto/asn1/asn1_err.c \ + crypto/asn1/asn1_gen.c \ + crypto/asn1/asn1_lib.c \ + crypto/asn1/asn1_par.c \ + crypto/asn1/asn_mime.c \ + crypto/asn1/asn_moid.c \ + crypto/asn1/asn_pack.c \ + crypto/asn1/bio_asn1.c \ + crypto/asn1/bio_ndef.c \ + crypto/asn1/d2i_pr.c \ + crypto/asn1/d2i_pu.c \ + crypto/asn1/evp_asn1.c \ + crypto/asn1/f_enum.c \ + crypto/asn1/f_int.c \ + crypto/asn1/f_string.c \ + crypto/asn1/i2d_pr.c \ + crypto/asn1/i2d_pu.c \ + crypto/asn1/n_pkey.c \ + crypto/asn1/nsseq.c \ + crypto/asn1/p5_pbe.c \ + crypto/asn1/p5_pbev2.c \ + crypto/asn1/p8_pkey.c \ + crypto/asn1/t_bitst.c \ + crypto/asn1/t_crl.c \ + crypto/asn1/t_pkey.c \ + crypto/asn1/t_req.c \ + crypto/asn1/t_spki.c \ + crypto/asn1/t_x509.c \ + crypto/asn1/t_x509a.c \ + crypto/asn1/tasn_dec.c \ + crypto/asn1/tasn_enc.c \ + crypto/asn1/tasn_fre.c \ + crypto/asn1/tasn_new.c \ + crypto/asn1/tasn_prn.c \ + crypto/asn1/tasn_typ.c \ + crypto/asn1/tasn_utl.c \ + crypto/asn1/x_algor.c \ + crypto/asn1/x_attrib.c \ + crypto/asn1/x_bignum.c \ + crypto/asn1/x_crl.c \ + crypto/asn1/x_exten.c \ + crypto/asn1/x_info.c \ + crypto/asn1/x_long.c \ + crypto/asn1/x_name.c \ + crypto/asn1/x_nx509.c \ + crypto/asn1/x_pkey.c \ + crypto/asn1/x_pubkey.c \ + crypto/asn1/x_req.c \ + crypto/asn1/x_sig.c \ + crypto/asn1/x_spki.c \ + crypto/asn1/x_val.c \ + crypto/asn1/x_x509.c \ + crypto/asn1/x_x509a.c \ + crypto/bf/bf_cfb64.c \ + crypto/bf/bf_ecb.c \ + crypto/bf/bf_enc.c \ + crypto/bf/bf_ofb64.c \ + crypto/bf/bf_skey.c \ + crypto/bio/b_dump.c \ + crypto/bio/b_print.c \ + crypto/bio/b_sock.c \ + crypto/bio/bf_buff.c \ + crypto/bio/bf_nbio.c \ + crypto/bio/bf_null.c \ + crypto/bio/bio_cb.c \ + crypto/bio/bio_err.c \ + crypto/bio/bio_lib.c \ + crypto/bio/bss_acpt.c \ + crypto/bio/bss_bio.c \ + crypto/bio/bss_conn.c \ + crypto/bio/bss_dgram.c \ + crypto/bio/bss_fd.c \ + crypto/bio/bss_file.c \ + crypto/bio/bss_log.c \ + crypto/bio/bss_mem.c \ + crypto/bio/bss_null.c \ + crypto/bio/bss_sock.c \ + crypto/bn/bn_add.c \ + crypto/bn/bn_asm.c \ + crypto/bn/bn_blind.c \ + crypto/bn/bn_const.c \ + crypto/bn/bn_ctx.c \ + crypto/bn/bn_div.c \ + crypto/bn/bn_err.c \ + crypto/bn/bn_exp.c \ + crypto/bn/bn_exp2.c \ + crypto/bn/bn_gcd.c \ + crypto/bn/bn_gf2m.c \ + crypto/bn/bn_kron.c \ + crypto/bn/bn_lib.c \ + crypto/bn/bn_mod.c \ + crypto/bn/bn_mont.c \ + crypto/bn/bn_mpi.c \ + crypto/bn/bn_mul.c \ + crypto/bn/bn_nist.c \ + crypto/bn/bn_prime.c \ + crypto/bn/bn_print.c \ + crypto/bn/bn_rand.c \ + crypto/bn/bn_recp.c \ + crypto/bn/bn_shift.c \ + crypto/bn/bn_sqr.c \ + crypto/bn/bn_sqrt.c \ + crypto/bn/bn_word.c \ + crypto/buffer/buf_err.c \ + crypto/buffer/buf_str.c \ + crypto/buffer/buffer.c \ + crypto/cmac/cm_ameth.c \ + crypto/cmac/cm_pmeth.c \ + crypto/cmac/cmac.c \ + crypto/cms/cms_asn1.c \ + crypto/cms/cms_att.c \ + crypto/cms/cms_cd.c \ + crypto/cms/cms_dd.c \ + crypto/cms/cms_enc.c \ + crypto/cms/cms_env.c \ + crypto/cms/cms_err.c \ + crypto/cms/cms_ess.c \ + crypto/cms/cms_io.c \ + crypto/cms/cms_lib.c \ + crypto/cms/cms_pwri.c \ + crypto/cms/cms_sd.c \ + crypto/cms/cms_smime.c \ + crypto/comp/c_rle.c \ + crypto/comp/c_zlib.c \ + crypto/comp/comp_err.c \ + crypto/comp/comp_lib.c \ + crypto/conf/conf_api.c \ + crypto/conf/conf_def.c \ + crypto/conf/conf_err.c \ + crypto/conf/conf_lib.c \ + crypto/conf/conf_mall.c \ + crypto/conf/conf_mod.c \ + crypto/conf/conf_sap.c \ + crypto/cpt_err.c \ + crypto/cryptlib.c \ + crypto/cversion.c \ + crypto/des/cbc_cksm.c \ + crypto/des/cbc_enc.c \ + crypto/des/cfb64ede.c \ + crypto/des/cfb64enc.c \ + crypto/des/cfb_enc.c \ + crypto/des/des_enc.c \ + crypto/des/des_old.c \ + crypto/des/des_old2.c \ + crypto/des/ecb3_enc.c \ + crypto/des/ecb_enc.c \ + crypto/des/ede_cbcm_enc.c \ + crypto/des/enc_read.c \ + crypto/des/enc_writ.c \ + crypto/des/fcrypt.c \ + crypto/des/fcrypt_b.c \ + crypto/des/ofb64ede.c \ + crypto/des/ofb64enc.c \ + crypto/des/ofb_enc.c \ + crypto/des/pcbc_enc.c \ + crypto/des/qud_cksm.c \ + crypto/des/rand_key.c \ + crypto/des/read2pwd.c \ + crypto/des/rpc_enc.c \ + crypto/des/set_key.c \ + crypto/des/str2key.c \ + crypto/des/xcbc_enc.c \ + crypto/dh/dh_ameth.c \ + crypto/dh/dh_asn1.c \ + crypto/dh/dh_check.c \ + crypto/dh/dh_depr.c \ + crypto/dh/dh_err.c \ + crypto/dh/dh_gen.c \ + crypto/dh/dh_key.c \ + crypto/dh/dh_lib.c \ + crypto/dh/dh_pmeth.c \ + crypto/dsa/dsa_ameth.c \ + crypto/dsa/dsa_asn1.c \ + crypto/dsa/dsa_depr.c \ + crypto/dsa/dsa_err.c \ + crypto/dsa/dsa_gen.c \ + crypto/dsa/dsa_key.c \ + crypto/dsa/dsa_lib.c \ + crypto/dsa/dsa_ossl.c \ + crypto/dsa/dsa_pmeth.c \ + crypto/dsa/dsa_prn.c \ + crypto/dsa/dsa_sign.c \ + crypto/dsa/dsa_vrf.c \ + crypto/dso/dso_dl.c \ + crypto/dso/dso_dlfcn.c \ + crypto/dso/dso_err.c \ + crypto/dso/dso_lib.c \ + crypto/dso/dso_null.c \ + crypto/dso/dso_openssl.c \ + crypto/ebcdic.c \ + crypto/ec/ec2_mult.c \ + crypto/ec/ec2_oct.c \ + crypto/ec/ec2_smpl.c \ + crypto/ec/ec_ameth.c \ + crypto/ec/ec_asn1.c \ + crypto/ec/ec_check.c \ + crypto/ec/ec_curve.c \ + crypto/ec/ec_cvt.c \ + crypto/ec/ec_err.c \ + crypto/ec/ec_key.c \ + crypto/ec/ec_lib.c \ + crypto/ec/ec_mult.c \ + crypto/ec/ec_oct.c \ + crypto/ec/ec_pmeth.c \ + crypto/ec/ec_print.c \ + crypto/ec/eck_prn.c \ + crypto/ec/ecp_mont.c \ + crypto/ec/ecp_nist.c \ + crypto/ec/ecp_oct.c \ + crypto/ec/ecp_smpl.c \ + crypto/ecdh/ech_err.c \ + crypto/ecdh/ech_key.c \ + crypto/ecdh/ech_lib.c \ + crypto/ecdh/ech_ossl.c \ + crypto/ecdsa/ecs_asn1.c \ + crypto/ecdsa/ecs_err.c \ + crypto/ecdsa/ecs_lib.c \ + crypto/ecdsa/ecs_ossl.c \ + crypto/ecdsa/ecs_sign.c \ + crypto/ecdsa/ecs_vrf.c \ + crypto/engine/eng_all.c \ + crypto/engine/eng_cnf.c \ + crypto/engine/eng_ctrl.c \ + crypto/engine/eng_dyn.c \ + crypto/engine/eng_err.c \ + crypto/engine/eng_fat.c \ + crypto/engine/eng_init.c \ + crypto/engine/eng_lib.c \ + crypto/engine/eng_list.c \ + crypto/engine/eng_pkey.c \ + crypto/engine/eng_table.c \ + crypto/engine/tb_asnmth.c \ + crypto/engine/tb_cipher.c \ + crypto/engine/tb_dh.c \ + crypto/engine/tb_digest.c \ + crypto/engine/tb_dsa.c \ + crypto/engine/tb_ecdh.c \ + crypto/engine/tb_ecdsa.c \ + crypto/engine/tb_pkmeth.c \ + crypto/engine/tb_rand.c \ + crypto/engine/tb_rsa.c \ + crypto/engine/tb_store.c \ + crypto/err/err.c \ + crypto/err/err_all.c \ + crypto/err/err_prn.c \ + crypto/evp/bio_b64.c \ + crypto/evp/bio_enc.c \ + crypto/evp/bio_md.c \ + crypto/evp/bio_ok.c \ + crypto/evp/c_all.c \ + crypto/evp/c_allc.c \ + crypto/evp/c_alld.c \ + crypto/evp/digest.c \ + crypto/evp/e_aes.c \ + crypto/evp/e_aes_cbc_hmac_sha1.c \ + crypto/evp/e_bf.c \ + crypto/evp/e_des.c \ + crypto/evp/e_des3.c \ + crypto/evp/e_null.c \ + crypto/evp/e_old.c \ + crypto/evp/e_rc2.c \ + crypto/evp/e_rc4.c \ + crypto/evp/e_rc4_hmac_md5.c \ + crypto/evp/e_rc5.c \ + crypto/evp/e_xcbc_d.c \ + crypto/evp/encode.c \ + crypto/evp/evp_acnf.c \ + crypto/evp/evp_cnf.c \ + crypto/evp/evp_enc.c \ + crypto/evp/evp_err.c \ + crypto/evp/evp_key.c \ + crypto/evp/evp_lib.c \ + crypto/evp/evp_pbe.c \ + crypto/evp/evp_pkey.c \ + crypto/evp/m_dss.c \ + crypto/evp/m_dss1.c \ + crypto/evp/m_ecdsa.c \ + crypto/evp/m_md4.c \ + crypto/evp/m_md5.c \ + crypto/evp/m_mdc2.c \ + crypto/evp/m_null.c \ + crypto/evp/m_ripemd.c \ + crypto/evp/m_sha1.c \ + crypto/evp/m_sigver.c \ + crypto/evp/m_wp.c \ + crypto/evp/names.c \ + crypto/evp/p5_crpt.c \ + crypto/evp/p5_crpt2.c \ + crypto/evp/p_dec.c \ + crypto/evp/p_enc.c \ + crypto/evp/p_lib.c \ + crypto/evp/p_open.c \ + crypto/evp/p_seal.c \ + crypto/evp/p_sign.c \ + crypto/evp/p_verify.c \ + crypto/evp/pmeth_fn.c \ + crypto/evp/pmeth_gn.c \ + crypto/evp/pmeth_lib.c \ + crypto/ex_data.c \ + crypto/hmac/hm_ameth.c \ + crypto/hmac/hm_pmeth.c \ + crypto/hmac/hmac.c \ + crypto/krb5/krb5_asn.c \ + crypto/lhash/lh_stats.c \ + crypto/lhash/lhash.c \ + crypto/md4/md4_dgst.c \ + crypto/md4/md4_one.c \ + crypto/md5/md5_dgst.c \ + crypto/md5/md5_one.c \ + crypto/mem.c \ + crypto/mem_clr.c \ + crypto/mem_dbg.c \ + crypto/modes/cbc128.c \ + crypto/modes/ccm128.c \ + crypto/modes/cfb128.c \ + crypto/modes/ctr128.c \ + crypto/modes/gcm128.c \ + crypto/modes/ofb128.c \ + crypto/modes/xts128.c \ + crypto/o_dir.c \ + crypto/o_init.c \ + crypto/o_str.c \ + crypto/o_time.c \ + crypto/objects/o_names.c \ + crypto/objects/obj_dat.c \ + crypto/objects/obj_err.c \ + crypto/objects/obj_lib.c \ + crypto/objects/obj_xref.c \ + crypto/ocsp/ocsp_asn.c \ + crypto/ocsp/ocsp_cl.c \ + crypto/ocsp/ocsp_err.c \ + crypto/ocsp/ocsp_ext.c \ + crypto/ocsp/ocsp_ht.c \ + crypto/ocsp/ocsp_lib.c \ + crypto/ocsp/ocsp_prn.c \ + crypto/ocsp/ocsp_srv.c \ + crypto/ocsp/ocsp_vfy.c \ + crypto/pem/pem_all.c \ + crypto/pem/pem_err.c \ + crypto/pem/pem_info.c \ + crypto/pem/pem_lib.c \ + crypto/pem/pem_oth.c \ + crypto/pem/pem_pk8.c \ + crypto/pem/pem_pkey.c \ + crypto/pem/pem_seal.c \ + crypto/pem/pem_sign.c \ + crypto/pem/pem_x509.c \ + crypto/pem/pem_xaux.c \ + crypto/pem/pvkfmt.c \ + crypto/pkcs12/p12_add.c \ + crypto/pkcs12/p12_asn.c \ + crypto/pkcs12/p12_attr.c \ + crypto/pkcs12/p12_crpt.c \ + crypto/pkcs12/p12_crt.c \ + crypto/pkcs12/p12_decr.c \ + crypto/pkcs12/p12_init.c \ + crypto/pkcs12/p12_key.c \ + crypto/pkcs12/p12_kiss.c \ + crypto/pkcs12/p12_mutl.c \ + crypto/pkcs12/p12_npas.c \ + crypto/pkcs12/p12_p8d.c \ + crypto/pkcs12/p12_p8e.c \ + crypto/pkcs12/p12_utl.c \ + crypto/pkcs12/pk12err.c \ + crypto/pkcs7/pk7_asn1.c \ + crypto/pkcs7/pk7_attr.c \ + crypto/pkcs7/pk7_doit.c \ + crypto/pkcs7/pk7_lib.c \ + crypto/pkcs7/pk7_mime.c \ + crypto/pkcs7/pk7_smime.c \ + crypto/pkcs7/pkcs7err.c \ + crypto/pqueue/pqueue.c \ + crypto/rand/md_rand.c \ + crypto/rand/rand_egd.c \ + crypto/rand/rand_err.c \ + crypto/rand/rand_lib.c \ + crypto/rand/rand_unix.c \ + crypto/rand/rand_win.c \ + crypto/rand/randfile.c \ + crypto/rc2/rc2_cbc.c \ + crypto/rc2/rc2_ecb.c \ + crypto/rc2/rc2_skey.c \ + crypto/rc2/rc2cfb64.c \ + crypto/rc2/rc2ofb64.c \ + crypto/rc4/rc4_enc.c \ + crypto/rc4/rc4_skey.c \ + crypto/rc4/rc4_utl.c \ + crypto/ripemd/rmd_dgst.c \ + crypto/ripemd/rmd_one.c \ + crypto/rsa/rsa_ameth.c \ + crypto/rsa/rsa_asn1.c \ + crypto/rsa/rsa_chk.c \ + crypto/rsa/rsa_crpt.c \ + crypto/rsa/rsa_eay.c \ + crypto/rsa/rsa_err.c \ + crypto/rsa/rsa_gen.c \ + crypto/rsa/rsa_lib.c \ + crypto/rsa/rsa_none.c \ + crypto/rsa/rsa_null.c \ + crypto/rsa/rsa_oaep.c \ + crypto/rsa/rsa_pk1.c \ + crypto/rsa/rsa_pmeth.c \ + crypto/rsa/rsa_prn.c \ + crypto/rsa/rsa_pss.c \ + crypto/rsa/rsa_saos.c \ + crypto/rsa/rsa_sign.c \ + crypto/rsa/rsa_ssl.c \ + crypto/rsa/rsa_x931.c \ + crypto/sha/sha1_one.c \ + crypto/sha/sha1dgst.c \ + crypto/sha/sha256.c \ + crypto/sha/sha512.c \ + crypto/sha/sha_dgst.c \ + crypto/srp/srp_lib.c \ + crypto/srp/srp_vfy.c \ + crypto/stack/stack.c \ + crypto/ts/ts_err.c \ + crypto/txt_db/txt_db.c \ + crypto/ui/ui_compat.c \ + crypto/ui/ui_err.c \ + crypto/ui/ui_lib.c \ + crypto/ui/ui_openssl.c \ + crypto/ui/ui_util.c \ + crypto/uid.c \ + crypto/x509/by_dir.c \ + crypto/x509/by_file.c \ + crypto/x509/x509_att.c \ + crypto/x509/x509_cmp.c \ + crypto/x509/x509_d2.c \ + crypto/x509/x509_def.c \ + crypto/x509/x509_err.c \ + crypto/x509/x509_ext.c \ + crypto/x509/x509_lu.c \ + crypto/x509/x509_obj.c \ + crypto/x509/x509_r2x.c \ + crypto/x509/x509_req.c \ + crypto/x509/x509_set.c \ + crypto/x509/x509_trs.c \ + crypto/x509/x509_txt.c \ + crypto/x509/x509_v3.c \ + crypto/x509/x509_vfy.c \ + crypto/x509/x509_vpm.c \ + crypto/x509/x509cset.c \ + crypto/x509/x509name.c \ + crypto/x509/x509rset.c \ + crypto/x509/x509spki.c \ + crypto/x509/x509type.c \ + crypto/x509/x_all.c \ + crypto/x509v3/pcy_cache.c \ + crypto/x509v3/pcy_data.c \ + crypto/x509v3/pcy_lib.c \ + crypto/x509v3/pcy_map.c \ + crypto/x509v3/pcy_node.c \ + crypto/x509v3/pcy_tree.c \ + crypto/x509v3/v3_akey.c \ + crypto/x509v3/v3_akeya.c \ + crypto/x509v3/v3_alt.c \ + crypto/x509v3/v3_bcons.c \ + crypto/x509v3/v3_bitst.c \ + crypto/x509v3/v3_conf.c \ + crypto/x509v3/v3_cpols.c \ + crypto/x509v3/v3_crld.c \ + crypto/x509v3/v3_enum.c \ + crypto/x509v3/v3_extku.c \ + crypto/x509v3/v3_genn.c \ + crypto/x509v3/v3_ia5.c \ + crypto/x509v3/v3_info.c \ + crypto/x509v3/v3_int.c \ + crypto/x509v3/v3_lib.c \ + crypto/x509v3/v3_ncons.c \ + crypto/x509v3/v3_ocsp.c \ + crypto/x509v3/v3_pci.c \ + crypto/x509v3/v3_pcia.c \ + crypto/x509v3/v3_pcons.c \ + crypto/x509v3/v3_pku.c \ + crypto/x509v3/v3_pmaps.c \ + crypto/x509v3/v3_prn.c \ + crypto/x509v3/v3_purp.c \ + crypto/x509v3/v3_skey.c \ + crypto/x509v3/v3_sxnet.c \ + crypto/x509v3/v3_utl.c \ + crypto/x509v3/v3err.c \ + +common_c_includes := \ + openssl/. \ + openssl/crypto \ + openssl/crypto/asn1 \ + openssl/crypto/evp \ + openssl/crypto/modes \ + openssl/include \ + openssl/include/openssl \ + +arm_cflags := \ + -DAES_ASM \ + -DBSAES_ASM \ + -DGHASH_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + +arm_src_files := \ + crypto/aes/asm/aes-armv4.S \ + crypto/aes/asm/bsaes-armv7.S \ + crypto/armcap.c \ + crypto/armv4cpuid.S \ + crypto/bn/asm/armv4-gf2m.S \ + crypto/bn/asm/armv4-mont.S \ + crypto/modes/asm/ghash-armv4.S \ + crypto/sha/asm/sha1-armv4-large.S \ + crypto/sha/asm/sha256-armv4.S \ + crypto/sha/asm/sha512-armv4.S \ + +arm_exclude_files := \ + crypto/aes/aes_core.c \ + crypto/mem_clr.c \ + +arm64_cflags := \ + -DOPENSSL_NO_ASM \ + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := \ + -DAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ + -DDES_UNROLL \ + -DGHASH_ASM \ + -DMD5_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_BN_ASM_PART_WORDS \ + -DOPENSSL_CPUID_OBJ \ + -DOPENSSL_IA32_SSE2 \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + -DVPAES_ASM \ + +x86_src_files := \ + crypto/aes/asm/aes-586.S \ + crypto/aes/asm/aesni-x86.S \ + crypto/aes/asm/vpaes-x86.S \ + crypto/bf/asm/bf-586.S \ + crypto/bn/asm/bn-586.S \ + crypto/bn/asm/co-586.S \ + crypto/bn/asm/x86-gf2m.S \ + crypto/bn/asm/x86-mont.S \ + crypto/des/asm/crypt586.S \ + crypto/des/asm/des-586.S \ + crypto/md5/asm/md5-586.S \ + crypto/modes/asm/ghash-x86.S \ + crypto/sha/asm/sha1-586.S \ + crypto/sha/asm/sha256-586.S \ + crypto/sha/asm/sha512-586.S \ + crypto/x86cpuid.S \ + +x86_exclude_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_core.c \ + crypto/bf/bf_enc.c \ + crypto/bn/bn_asm.c \ + crypto/des/des_enc.c \ + crypto/des/fcrypt_b.c \ + crypto/mem_clr.c \ + +x86_64_cflags := \ + -DAES_ASM \ + -DBSAES_ASM \ + -DDES_PTR \ + -DDES_RISC1 \ + -DDES_UNROLL \ + -DGHASH_ASM \ + -DMD5_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DOPENSSL_BN_ASM_MONT5 \ + -DOPENSSL_CPUID_OBJ \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + -DVPAES_ASM \ + +x86_64_src_files := \ + crypto/aes/asm/aes-x86_64.S \ + crypto/aes/asm/aesni-sha1-x86_64.S \ + crypto/aes/asm/aesni-x86_64.S \ + crypto/aes/asm/bsaes-x86_64.S \ + crypto/aes/asm/vpaes-x86_64.S \ + crypto/bn/asm/modexp512-x86_64.S \ + crypto/bn/asm/x86_64-gcc.c \ + crypto/bn/asm/x86_64-gf2m.S \ + crypto/bn/asm/x86_64-mont.S \ + crypto/bn/asm/x86_64-mont5.S \ + crypto/md5/asm/md5-x86_64.S \ + crypto/modes/asm/ghash-x86_64.S \ + crypto/rc4/asm/rc4-md5-x86_64.S \ + crypto/rc4/asm/rc4-x86_64.S \ + crypto/sha/asm/sha1-x86_64.S \ + crypto/sha/asm/sha256-x86_64.S \ + crypto/sha/asm/sha512-x86_64.S \ + crypto/x86_64cpuid.S \ + +x86_64_exclude_files := \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_core.c \ + crypto/bn/bn_asm.c \ + crypto/mem_clr.c \ + crypto/rc4/rc4_enc.c \ + crypto/rc4/rc4_skey.c \ + +mips_cflags := \ + -DAES_ASM \ + -DOPENSSL_BN_ASM_MONT \ + -DSHA1_ASM \ + -DSHA256_ASM \ + +mips_src_files := \ + crypto/aes/asm/aes-mips.S \ + crypto/bn/asm/bn-mips.S \ + crypto/bn/asm/mips-mont.S \ + crypto/sha/asm/sha1-mips.S \ + crypto/sha/asm/sha256-mips.S \ + +mips_exclude_files := \ + crypto/aes/aes_core.c \ + crypto/bn/bn_asm.c \ + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) + +LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files)) +LOCAL_CFLAGS_arm += $(arm_cflags) + +LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files)) +LOCAL_CFLAGS_arm64 += $(arm64_cflags) + +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86 += $(x86_cflags) + +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) + +LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files)) +LOCAL_CFLAGS_mips += $(mips_cflags) diff --git a/app/openssl/Crypto-config-trusty.mk b/app/openssl/Crypto-config-trusty.mk new file mode 100644 index 00000000..dc5b12c2 --- /dev/null +++ b/app/openssl/Crypto-config-trusty.mk @@ -0,0 +1,262 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-trusty.mk + +common_cflags := \ + -DGETPID_IS_MEANINGLESS \ + -DNO_WINDOWS_BRAINDEATH \ + +common_src_files := \ + Crypto-config.mk \ + crypto/aes/aes_cbc.c \ + crypto/aes/aes_misc.c \ + crypto/asn1/a_bitstr.c \ + crypto/asn1/a_d2i_fp.c \ + crypto/asn1/a_int.c \ + crypto/asn1/a_object.c \ + crypto/asn1/a_octet.c \ + crypto/asn1/a_type.c \ + crypto/asn1/ameth_lib.c \ + crypto/asn1/asn1_lib.c \ + crypto/asn1/asn_pack.c \ + crypto/asn1/d2i_pr.c \ + crypto/asn1/f_int.c \ + crypto/asn1/i2d_pr.c \ + crypto/asn1/p8_pkey.c \ + crypto/asn1/t_pkey.c \ + crypto/asn1/t_x509.c \ + crypto/asn1/tasn_dec.c \ + crypto/asn1/tasn_enc.c \ + crypto/asn1/tasn_fre.c \ + crypto/asn1/tasn_new.c \ + crypto/asn1/tasn_typ.c \ + crypto/asn1/tasn_utl.c \ + crypto/asn1/x_algor.c \ + crypto/asn1/x_attrib.c \ + crypto/asn1/x_bignum.c \ + crypto/asn1/x_long.c \ + crypto/asn1/x_pubkey.c \ + crypto/asn1/x_sig.c \ + crypto/bio/b_print.c \ + crypto/bio/bio_lib.c \ + crypto/bio/bss_mem.c \ + crypto/bn/bn_add.c \ + crypto/bn/bn_asm.c \ + crypto/bn/bn_blind.c \ + crypto/bn/bn_ctx.c \ + crypto/bn/bn_div.c \ + crypto/bn/bn_exp.c \ + crypto/bn/bn_exp2.c \ + crypto/bn/bn_gcd.c \ + crypto/bn/bn_gf2m.c \ + crypto/bn/bn_kron.c \ + crypto/bn/bn_lib.c \ + crypto/bn/bn_mod.c \ + crypto/bn/bn_mont.c \ + crypto/bn/bn_mul.c \ + crypto/bn/bn_nist.c \ + crypto/bn/bn_prime.c \ + crypto/bn/bn_print.c \ + crypto/bn/bn_rand.c \ + crypto/bn/bn_recp.c \ + crypto/bn/bn_shift.c \ + crypto/bn/bn_sqr.c \ + crypto/bn/bn_sqrt.c \ + crypto/bn/bn_word.c \ + crypto/buffer/buf_str.c \ + crypto/buffer/buffer.c \ + crypto/cmac/cm_ameth.c \ + crypto/cmac/cm_pmeth.c \ + crypto/cmac/cmac.c \ + crypto/cryptlib.c \ + crypto/dh/dh_ameth.c \ + crypto/dh/dh_asn1.c \ + crypto/dh/dh_check.c \ + crypto/dh/dh_gen.c \ + crypto/dh/dh_key.c \ + crypto/dh/dh_lib.c \ + crypto/dh/dh_pmeth.c \ + crypto/dsa/dsa_ameth.c \ + crypto/dsa/dsa_asn1.c \ + crypto/dsa/dsa_gen.c \ + crypto/dsa/dsa_key.c \ + crypto/dsa/dsa_lib.c \ + crypto/dsa/dsa_ossl.c \ + crypto/dsa/dsa_pmeth.c \ + crypto/dsa/dsa_sign.c \ + crypto/dsa/dsa_vrf.c \ + crypto/ec/ec2_mult.c \ + crypto/ec/ec2_oct.c \ + crypto/ec/ec2_smpl.c \ + crypto/ec/ec_ameth.c \ + crypto/ec/ec_asn1.c \ + crypto/ec/ec_curve.c \ + crypto/ec/ec_cvt.c \ + crypto/ec/ec_key.c \ + crypto/ec/ec_lib.c \ + crypto/ec/ec_mult.c \ + crypto/ec/ec_oct.c \ + crypto/ec/ec_pmeth.c \ + crypto/ec/ec_print.c \ + crypto/ec/eck_prn.c \ + crypto/ec/ecp_mont.c \ + crypto/ec/ecp_nist.c \ + crypto/ec/ecp_oct.c \ + crypto/ec/ecp_smpl.c \ + crypto/ecdh/ech_key.c \ + crypto/ecdh/ech_lib.c \ + crypto/ecdh/ech_ossl.c \ + crypto/ecdsa/ecs_asn1.c \ + crypto/ecdsa/ecs_lib.c \ + crypto/ecdsa/ecs_ossl.c \ + crypto/ecdsa/ecs_sign.c \ + crypto/ecdsa/ecs_vrf.c \ + crypto/engine/eng_init.c \ + crypto/engine/eng_lib.c \ + crypto/engine/eng_table.c \ + crypto/engine/tb_asnmth.c \ + crypto/engine/tb_cipher.c \ + crypto/engine/tb_dh.c \ + crypto/engine/tb_digest.c \ + crypto/engine/tb_dsa.c \ + crypto/engine/tb_ecdh.c \ + crypto/engine/tb_ecdsa.c \ + crypto/engine/tb_pkmeth.c \ + crypto/engine/tb_rand.c \ + crypto/engine/tb_rsa.c \ + crypto/err/err.c \ + crypto/evp/digest.c \ + crypto/evp/e_aes.c \ + crypto/evp/evp_enc.c \ + crypto/evp/evp_lib.c \ + crypto/evp/evp_pkey.c \ + crypto/evp/m_sha1.c \ + crypto/evp/m_sigver.c \ + crypto/evp/names.c \ + crypto/evp/p_lib.c \ + crypto/evp/pmeth_fn.c \ + crypto/evp/pmeth_gn.c \ + crypto/evp/pmeth_lib.c \ + crypto/ex_data.c \ + crypto/hmac/hm_ameth.c \ + crypto/hmac/hm_pmeth.c \ + crypto/hmac/hmac.c \ + crypto/lhash/lhash.c \ + crypto/mem.c \ + crypto/mem_clr.c \ + crypto/mem_dbg.c \ + crypto/modes/cbc128.c \ + crypto/modes/ctr128.c \ + crypto/objects/o_names.c \ + crypto/objects/obj_dat.c \ + crypto/objects/obj_xref.c \ + crypto/pkcs7/pk7_lib.c \ + crypto/rand/md_rand.c \ + crypto/rand/rand_lib.c \ + crypto/rsa/rsa_ameth.c \ + crypto/rsa/rsa_asn1.c \ + crypto/rsa/rsa_chk.c \ + crypto/rsa/rsa_crpt.c \ + crypto/rsa/rsa_eay.c \ + crypto/rsa/rsa_gen.c \ + crypto/rsa/rsa_lib.c \ + crypto/rsa/rsa_none.c \ + crypto/rsa/rsa_oaep.c \ + crypto/rsa/rsa_pk1.c \ + crypto/rsa/rsa_pmeth.c \ + crypto/rsa/rsa_pss.c \ + crypto/rsa/rsa_saos.c \ + crypto/rsa/rsa_sign.c \ + crypto/rsa/rsa_ssl.c \ + crypto/rsa/rsa_x931.c \ + crypto/sha/sha1_one.c \ + crypto/sha/sha1dgst.c \ + crypto/sha/sha256.c \ + crypto/sha/sha512.c \ + crypto/stack/stack.c \ + crypto/x509/x_all.c \ + crypto/x509v3/v3_utl.c \ + +common_c_includes := \ + external/openssl/. \ + external/openssl/crypto \ + external/openssl/crypto/asn1 \ + external/openssl/crypto/evp \ + external/openssl/crypto/modes \ + external/openssl/include \ + external/openssl/include/openssl \ + +arm_cflags := \ + -DAES_ASM \ + -DGHASH_ASM \ + -DOPENSSL_BN_ASM_GF2m \ + -DOPENSSL_BN_ASM_MONT \ + -DSHA1_ASM \ + -DSHA256_ASM \ + -DSHA512_ASM \ + +arm_src_files := \ + crypto/aes/asm/aes-armv4.S \ + crypto/bn/asm/armv4-gf2m.S \ + crypto/bn/asm/armv4-mont.S \ + crypto/sha/asm/sha1-armv4-large.S \ + crypto/sha/asm/sha256-armv4.S \ + crypto/sha/asm/sha512-armv4.S \ + +arm_exclude_files := + +arm64_cflags := + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := + +x86_src_files := + +x86_exclude_files := + +x86_64_cflags := + +x86_64_src_files := + +x86_64_exclude_files := + +mips_cflags := + +mips_src_files := + +mips_exclude_files := + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) + +LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files)) +LOCAL_CFLAGS_arm += $(arm_cflags) + +LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files)) +LOCAL_CFLAGS_arm64 += $(arm64_cflags) + +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86 += $(x86_cflags) + +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) + +LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files)) +LOCAL_CFLAGS_mips += $(mips_cflags) diff --git a/app/openssl/Crypto.mk b/app/openssl/Crypto.mk new file mode 100644 index 00000000..4214b91e --- /dev/null +++ b/app/openssl/Crypto.mk @@ -0,0 +1,76 @@ +####################################### +# target static library +include $(CLEAR_VARS) +include $(LOCAL_PATH)/ndk-build-clear.mk +LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) + +# The static library should be used in only unbundled apps +# and we don't have clang in unbundled build yet. +LOCAL_SDK_VERSION := 9 + +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE:= libcrypto_static +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +include $(LOCAL_PATH)/Crypto-config-target.mk +include $(LOCAL_PATH)/android-config.mk +include $(LOCAL_PATH)/ndk-build.mk + +# Replace cflags with static-specific cflags so we dont build in libdl deps +LOCAL_CFLAGS_32 := $(openssl_cflags_static_32) +LOCAL_CFLAGS_64 := $(openssl_cflags_static_64) +include $(BUILD_STATIC_LIBRARY) + +####################################### +# target shared library +include $(CLEAR_VARS) +include $(LOCAL_PATH)/ndk-build-clear.mk +LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) + +# If we're building an unbundled build, don't try to use clang since it's not +# in the NDK yet. This can be removed when a clang version that is fast enough +# in the NDK. +ifeq (,$(TARGET_BUILD_APPS)) +LOCAL_CLANG := true +ifeq ($(HOST_OS), darwin_XXX) +LOCAL_ASFLAGS += -no-integrated-as +LOCAL_CFLAGS += -no-integrated-as +endif +else +LOCAL_SDK_VERSION := 9 +endif +LOCAL_LDFLAGS += -ldl + +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE:= libcrypto +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +include $(LOCAL_PATH)/Crypto-config-target.mk +include $(LOCAL_PATH)/android-config.mk +include $(LOCAL_PATH)/ndk-build.mk +include $(BUILD_SHARED_LIBRARY) + +####################################### +# host shared library +# include $(CLEAR_VARS) +# LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) +# LOCAL_CFLAGS += -DPURIFY +# LOCAL_LDLIBS += -ldl +# LOCAL_MODULE_TAGS := optional +# LOCAL_MODULE:= libcrypto-host +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +# include $(LOCAL_PATH)/Crypto-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_HOST_SHARED_LIBRARY) + +######################################## +# host static library, which is used by some SDK tools. + +# include $(CLEAR_VARS) +# LOCAL_SHARED_LIBRARIES := $(log_shared_libraries) +# LOCAL_CFLAGS += -DPURIFY +# LOCAL_LDLIBS += -ldl +# LOCAL_MODULE_TAGS := optional +# LOCAL_MODULE:= libcrypto_static +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk +# include $(LOCAL_PATH)/Crypto-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/app/openssl/README.android b/app/openssl/README.android index e93fb005..a7c3cc10 100644 --- a/app/openssl/README.android +++ b/app/openssl/README.android @@ -23,7 +23,6 @@ The following steps are recommended for porting new OpenSSL versions. 2) Update the variables in openssl.config and openssl.version as appropriate. At the very least you will need to update the openssl.version. - Similarly update ThirdPartyProject.prop. 3) Run: @@ -54,17 +53,23 @@ The following steps are recommended for porting new OpenSSL versions. # Build and sync libcore tests (croot && cd libcore && mm -j16 snod && adb remount && adb sync) # Run tests from libcore - (croot && vogar --classpath out/target/common/obj/JAVA_LIBRARIES/core-tests-support_intermediates/classes.jar --classpath out/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jar javax.net.ssl tests.api.javax.net) + (croot && vogar --classpath out/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jar javax.net.ssl tests.api.javax.net) # Run tests from Harmony - (croot && vogar --classpath harmony_tests.jar tests.api.java.math.BigIntegerTest org.apache.harmony.tests.java.math) + (croot && vogar --classpath out/target/common/obj/JAVA_LIBRARIES/apache-harmony-tests_intermediates/classes.jar tests.api.java.math.BigIntegerTest org.apache.harmony.tests.java.math) # try an https website adb shell am start https://online.citibank.com # confirm result in browser The vogar tool can be found externally at http://code.google.com/p/vogar/ - Within Google it can be run with ~dalvik-prebuild/vogar/bin/vogar - harmony_tests.jar is built from Subversion http://harmony.apache.org/ - Within Google it can be found at ~dalvik-prebuild/bin/harmony_tests.jar + Quick installation instructions (without rebuilding from source): + VOGAR=$HOME/vogar + svn co http://vogar.googlecode.com/svn/trunk/ $VOGAR + mkdir -p $VOGAR/build/ + curl -o $VOGAR/build/vogar.jar https://vogar.googlecode.com/files/vogar.jar + PATH=$PATH:$VOGAR/bin + + Within Google, you can find it under: + /home/dalvik-prebuild/vogar/bin/vogar # You can also run openssl s_server as a test server on the device: adb push ./android.testssl/CAss.cnf /sdcard/CAss.cnf @@ -76,17 +81,7 @@ The following steps are recommended for porting new OpenSSL versions. m -j16 -Optionally, check whether build flags (located in android-config.mk -need to be updated. Doing this step will help ensure that the -compiled library is appropriately optimized for speed and size. To -update build flags: - -a) source openssl.config -b) tar -zxf openssl-*.tar.gz -c) cd openssl-*/ -d) ./Configure $CONFIGURE_ARGS -e) examine Makefile and compare with ../android-config.mk -f) modify ../openssl.config as appropriate and go to step 3) above. - -Alternatively, ."/import_openssl.sh import" now prints the -post-Configure Makefile for review before deleting in on import. +Optionally, check whether build flags (located in CONFIGURE_ARGS in +openssl.config, plus some extras in android-config.mk), need to be updated. +Doing this step will help ensure that the compiled library is appropriately +optimized for speed and size. diff --git a/app/openssl/Ssl-config-host.mk b/app/openssl/Ssl-config-host.mk new file mode 100644 index 00000000..95035487 --- /dev/null +++ b/app/openssl/Ssl-config-host.mk @@ -0,0 +1,113 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Ssl-config-host.mk + +common_cflags := + +common_src_files := \ + ssl/bio_ssl.c \ + ssl/d1_both.c \ + ssl/d1_enc.c \ + ssl/d1_lib.c \ + ssl/d1_pkt.c \ + ssl/d1_srtp.c \ + ssl/kssl.c \ + ssl/s23_clnt.c \ + ssl/s23_lib.c \ + ssl/s23_meth.c \ + ssl/s23_pkt.c \ + ssl/s23_srvr.c \ + ssl/s2_clnt.c \ + ssl/s2_enc.c \ + ssl/s2_lib.c \ + ssl/s2_meth.c \ + ssl/s2_pkt.c \ + ssl/s2_srvr.c \ + ssl/s3_both.c \ + ssl/s3_cbc.c \ + ssl/s3_clnt.c \ + ssl/s3_enc.c \ + ssl/s3_lib.c \ + ssl/s3_meth.c \ + ssl/s3_pkt.c \ + ssl/s3_srvr.c \ + ssl/ssl_algs.c \ + ssl/ssl_asn1.c \ + ssl/ssl_cert.c \ + ssl/ssl_ciph.c \ + ssl/ssl_err.c \ + ssl/ssl_err2.c \ + ssl/ssl_lib.c \ + ssl/ssl_rsa.c \ + ssl/ssl_sess.c \ + ssl/ssl_stat.c \ + ssl/ssl_txt.c \ + ssl/t1_clnt.c \ + ssl/t1_enc.c \ + ssl/t1_lib.c \ + ssl/t1_meth.c \ + ssl/t1_reneg.c \ + ssl/t1_srvr.c \ + ssl/tls_srp.c \ + +common_c_includes := \ + external/openssl/. \ + external/openssl/crypto \ + external/openssl/include \ + +arm_cflags := + +arm_src_files := + +arm_exclude_files := + +arm64_cflags := + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := + +x86_src_files := + +x86_exclude_files := + +x86_64_cflags := + +x86_64_src_files := + +x86_64_exclude_files := + +mips_cflags := + +mips_src_files := + +mips_exclude_files := + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes) + +ifeq ($(HOST_OS),linux) +LOCAL_CFLAGS_x86 += $(x86_cflags) +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files)) +else +$(warning Unknown host OS $(HOST_OS)) +LOCAL_SRC_FILES += $(common_src_files) +endif diff --git a/app/openssl/Ssl-config-target.mk b/app/openssl/Ssl-config-target.mk new file mode 100644 index 00000000..32439d3f --- /dev/null +++ b/app/openssl/Ssl-config-target.mk @@ -0,0 +1,118 @@ +# Auto-generated - DO NOT EDIT! +# To regenerate, edit openssl.config, then run: +# ./import_openssl.sh import /path/to/openssl-1.0.1g.tar.gz +# +# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_$(TARGET_ARCH) +# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_$(TARGET_ARCH) +# LOCAL_CFLAGS_$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Ssl-config-target.mk + +common_cflags := + +common_src_files := \ + ssl/bio_ssl.c \ + ssl/d1_both.c \ + ssl/d1_enc.c \ + ssl/d1_lib.c \ + ssl/d1_pkt.c \ + ssl/d1_srtp.c \ + ssl/kssl.c \ + ssl/s23_clnt.c \ + ssl/s23_lib.c \ + ssl/s23_meth.c \ + ssl/s23_pkt.c \ + ssl/s23_srvr.c \ + ssl/s2_clnt.c \ + ssl/s2_enc.c \ + ssl/s2_lib.c \ + ssl/s2_meth.c \ + ssl/s2_pkt.c \ + ssl/s2_srvr.c \ + ssl/s3_both.c \ + ssl/s3_cbc.c \ + ssl/s3_clnt.c \ + ssl/s3_enc.c \ + ssl/s3_lib.c \ + ssl/s3_meth.c \ + ssl/s3_pkt.c \ + ssl/s3_srvr.c \ + ssl/ssl_algs.c \ + ssl/ssl_asn1.c \ + ssl/ssl_cert.c \ + ssl/ssl_ciph.c \ + ssl/ssl_err.c \ + ssl/ssl_err2.c \ + ssl/ssl_lib.c \ + ssl/ssl_rsa.c \ + ssl/ssl_sess.c \ + ssl/ssl_stat.c \ + ssl/ssl_txt.c \ + ssl/t1_clnt.c \ + ssl/t1_enc.c \ + ssl/t1_lib.c \ + ssl/t1_meth.c \ + ssl/t1_reneg.c \ + ssl/t1_srvr.c \ + ssl/tls_srp.c \ + +common_c_includes := \ + openssl/. \ + openssl/crypto \ + openssl/include \ + +arm_cflags := + +arm_src_files := + +arm_exclude_files := + +arm64_cflags := + +arm64_src_files := + +arm64_exclude_files := + +x86_cflags := + +x86_src_files := + +x86_exclude_files := + +x86_64_cflags := + +x86_64_src_files := + +x86_64_exclude_files := + +mips_cflags := + +mips_src_files := + +mips_exclude_files := + + +LOCAL_CFLAGS += $(common_cflags) +LOCAL_C_INCLUDES += $(common_c_includes) + +LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files)) +LOCAL_CFLAGS_arm += $(arm_cflags) + +LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files)) +LOCAL_CFLAGS_arm64 += $(arm64_cflags) + +LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files)) +LOCAL_CFLAGS_x86 += $(x86_cflags) + +LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files)) +LOCAL_CFLAGS_x86_64 += $(x86_64_cflags) + +LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files)) +LOCAL_CFLAGS_mips += $(mips_cflags) diff --git a/app/openssl/Ssl.mk b/app/openssl/Ssl.mk new file mode 100644 index 00000000..8ce82d9b --- /dev/null +++ b/app/openssl/Ssl.mk @@ -0,0 +1,66 @@ +####################################### +# target static library +include $(CLEAR_VARS) +include $(LOCAL_PATH)/ndk-build-clear.mk + +# The static library should be used in only unbundled apps +# and we don't have clang in unbundled build yet. +LOCAL_SDK_VERSION := 9 + +LOCAL_SRC_FILES += $(target_src_files) +LOCAL_CFLAGS += $(target_c_flags) +LOCAL_C_INCLUDES += $(target_c_includes) +LOCAL_SHARED_LIBRARIES = $(log_shared_libraries) +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE:= libssl_static +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk +include $(LOCAL_PATH)/Ssl-config-target.mk +include $(LOCAL_PATH)/android-config.mk +include $(LOCAL_PATH)/ndk-build.mk +include $(BUILD_STATIC_LIBRARY) + +####################################### +# target shared library +include $(CLEAR_VARS) +include $(LOCAL_PATH)/ndk-build-clear.mk + +# If we're building an unbundled build, don't try to use clang since it's not +# in the NDK yet. This can be removed when a clang version that is fast enough +# in the NDK. +ifeq (,$(TARGET_BUILD_APPS)) +LOCAL_CLANG := true +else +LOCAL_SDK_VERSION := 9 +endif + +LOCAL_SHARED_LIBRARIES += libcrypto $(log_shared_libraries) +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE:= libssl +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk +include $(LOCAL_PATH)/Ssl-config-target.mk +include $(LOCAL_PATH)/android-config.mk +include $(LOCAL_PATH)/ndk-build.mk +include $(BUILD_SHARED_LIBRARY) + +# ####################################### +# # host shared library +# include $(CLEAR_VARS) +# LOCAL_SHARED_LIBRARIES += libcrypto-host $(log_shared_libraries) +# LOCAL_MODULE_TAGS := optional +# LOCAL_MODULE:= libssl-host +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk +# include $(LOCAL_PATH)/Ssl-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_HOST_SHARED_LIBRARY) + +# ####################################### +# # ssltest +# include $(CLEAR_VARS) +# LOCAL_SRC_FILES:= ssl/ssltest.c +# LOCAL_SHARED_LIBRARIES := libssl libcrypto $(log_shared_libraries) +# LOCAL_MODULE:= ssltest +# LOCAL_MODULE_TAGS := optional +# LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk +# include $(LOCAL_PATH)/Ssl-config-host.mk +# include $(LOCAL_PATH)/android-config.mk +# include $(BUILD_EXECUTABLE) diff --git a/app/openssl/ThirdPartyProject.prop b/app/openssl/ThirdPartyProject.prop deleted file mode 100644 index 34ad609a..00000000 --- a/app/openssl/ThirdPartyProject.prop +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright 2010 Google Inc. All Rights Reserved. -#Fri Jul 16 10:03:09 PDT 2010 -currentVersion=1.0.0e -version=1.0.0e -isNative=true -feedurl=http\://www.openssl.org/ -name=openssl -keywords=openssl -onDevice=true -homepage=http\://www.openssl.org/ diff --git a/app/openssl/android-config.mk b/app/openssl/android-config.mk index d76d6e30..2a091130 100644 --- a/app/openssl/android-config.mk +++ b/app/openssl/android-config.mk @@ -1,17 +1,48 @@ # -# These flags represent the build-time configuration of openssl for android +# These flags represent the build-time configuration of OpenSSL for android # -# They were pruned from the "Makefile" generated by running ./Configure from import_openssl.sh +# The value of $(openssl_cflags) was pruned from the Makefile generated +# by running ./Configure from import_openssl.sh. # +# This script performs minor but required patching for the Android build. +# + +# Directories for ENGINE shared libraries +openssl_cflags_32 += \ + -DOPENSSLDIR="\"/system/lib/ssl\"" \ + -DENGINESDIR="\"/system/lib/ssl/engines\"" +openssl_cflags_static_32 += \ + -DOPENSSLDIR="\"/system/lib/ssl\"" \ + -DENGINESDIR="\"/system/lib/ssl/engines\"" +openssl_cflags_64 += \ + -DOPENSSLDIR="\"/system/lib64/ssl\"" \ + -DENGINESDIR="\"/system/lib64/ssl/engines\"" +openssl_cflags_static_64 += \ + -DOPENSSLDIR="\"/system/lib64/ssl\"" \ + -DENGINESDIR="\"/system/lib64/ssl/engines\"" -# From CLFAG= -LOCAL_CFLAGS += -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -DL_ENDIAN #-DTERMIO +# Intentionally excluded http://b/7079965 +ifneq (,$(filter -DZLIB, $(openssl_cflags_32) $(openssl_cflags_64) \ + $(openssl_cflags_static_32) $(openssl_cflags_static_64))) +$(error ZLIB should not be enabled in openssl configuration) +endif -# From DEPFLAG= -LOCAL_CFLAGS += -DOPENSSL_NO_CAMELLIA -DOPENSSL_NO_CAPIENG -DOPENSSL_NO_CAST -DOPENSSL_NO_CMS -DOPENSSL_NO_GMP -DOPENSSL_NO_IDEA -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_MDC2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_SHA0 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SEED -DOPENSSL_NO_STORE -DOPENSSL_NO_WHIRLPOOL +LOCAL_CFLAGS_32 += $(openssl_cflags_32) +LOCAL_CFLAGS_64 += $(openssl_cflags_64) -# Extra -LOCAL_CFLAGS += -DOPENSSL_NO_HW -DOPENSSL_NO_ENGINE -DZLIB +LOCAL_CFLAGS_32 := $(filter-out -DTERMIO, $(LOCAL_CFLAGS_32)) +LOCAL_CFLAGS_64 := $(filter-out -DTERMIO, $(LOCAL_CFLAGS_64)) +# filter out static flags too +openssl_cflags_static_32 := $(filter-out -DTERMIO, $(openssl_cflags_static_32)) +openssl_cflags_static_64 := $(filter-out -DTERMIO, $(openssl_cflags_static_64)) + +ifeq ($(HOST_OS),windows) +LOCAL_CFLAGS_32 := $(filter-out -DDSO_DLFCN -DHAVE_DLFCN_H,$(LOCAL_CFLAGS_32)) +LOCAL_CFLAGS_64 := $(filter-out -DDSO_DLFCN -DHAVE_DLFCN_H,$(LOCAL_CFLAGS_64)) +endif # Debug # LOCAL_CFLAGS += -DCIPHER_DEBUG + +# Add clang here when it works on host +# LOCAL_CLANG := true diff --git a/app/openssl/android.testssl/CAss.cnf b/app/openssl/android.testssl/CAss.cnf index 1173c080..77c01c30 100644 --- a/app/openssl/android.testssl/CAss.cnf +++ b/app/openssl/android.testssl/CAss.cnf @@ -7,7 +7,7 @@ RANDFILE = /sdcard/android.testssl/.rnd #################################################################### [ req ] -default_bits = 512 +default_bits = 2048 default_keyfile = keySS.pem distinguished_name = req_distinguished_name encrypt_rsa_key = no diff --git a/app/openssl/android.testssl/Uss.cnf b/app/openssl/android.testssl/Uss.cnf index 56dcdd5e..317ab6de 100644 --- a/app/openssl/android.testssl/Uss.cnf +++ b/app/openssl/android.testssl/Uss.cnf @@ -7,11 +7,11 @@ RANDFILE = /sdcard/android.testssl/.rnd #################################################################### [ req ] -default_bits = 512 +default_bits = 2048 default_keyfile = keySS.pem distinguished_name = req_distinguished_name encrypt_rsa_key = no -default_md = md2 +default_md = sha256 [ req_distinguished_name ] countryName = Country Name (2 letter code) diff --git a/app/openssl/android.testssl/server2.pem b/app/openssl/android.testssl/server2.pem index 8bb66419..a3927cf7 100644 --- a/app/openssl/android.testssl/server2.pem +++ b/app/openssl/android.testssl/server2.pem @@ -1,376 +1,52 @@ -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Server test cert (1024 bit) ------BEGIN CERTIFICATE----- -MIICLjCCAZcCAQEwDQYJKoZIhvcNAQEEBQAwWzELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYD -VQQDExJUZXN0IENBICgxMDI0IGJpdCkwHhcNOTcwNjA5MTM1NzU0WhcNOTgwNjA5 -MTM1NzU0WjBkMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDEaMBgG -A1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxJDAiBgNVBAMTG1NlcnZlciB0ZXN0IGNl -cnQgKDEwMjQgYml0KTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAsxH1PBPm -RkxrR11eV4bzNi4N9n11CI8nV29+ARlT1+qDe/mjVUvXlmsr1v/vf71G9GgqopSa -6RXrICLVdk/FYYYzhPvl1M+OrjaXDFO8BzBAF1Lnz6c7aRZvGRJNrRSr2nZEkqDf -JW9dY7r2VZEpD5QeuaRYUnuECkqeieB65GMCAwEAATANBgkqhkiG9w0BAQQFAAOB -gQCWsOta6C0wiVzXz8wPmJKyTrurMlgUss2iSuW9366iwofZddsNg7FXniMzkIf6 -dp7jnmWZwKZ9cXsNUS2o4OL07qOk2HOywC0YsNZQsOBu1CBTYYkIefDiKFL1zQHh -8lwwNd4NP+OE3NzUNkCfh4DnFfg9WHkXUlD5UpxNRJ4gJA== ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIICXgIBAAKBgQCzEfU8E+ZGTGtHXV5XhvM2Lg32fXUIjydXb34BGVPX6oN7+aNV -S9eWayvW/+9/vUb0aCqilJrpFesgItV2T8VhhjOE++XUz46uNpcMU7wHMEAXUufP -pztpFm8ZEk2tFKvadkSSoN8lb11juvZVkSkPlB65pFhSe4QKSp6J4HrkYwIDAQAB -AoGBAKy8jvb0Lzby8q11yNLf7+78wCVdYi7ugMHcYA1JVFK8+zb1WfSm44FLQo/0 -dSChAjgz36TTexeLODPYxleJndjVcOMVzsLJjSM8dLpXsTS4FCeMbhw2s2u+xqKY -bbPWfk+HOTyJjfnkcC5Nbg44eOmruq0gSmBeUXVM5UntlTnxAkEA7TGCA3h7kx5E -Bl4zl2pc3gPAGt+dyfk5Po9mGJUUXhF5p2zueGmYWW74TmOWB1kzt4QRdYMzFePq -zfDNXEa1CwJBAMFErdY0xp0UJ13WwBbUTk8rujqQdHtjw0klhpbuKkjxu2hN0wwM -6p0D9qxF7JHaghqVRI0fAW/EE0OzdHMR9QkCQQDNR26dMFXKsoPu+vItljj/UEGf -QG7gERiQ4yxaFBPHgdpGo0kT31eh9x9hQGDkxTe0GNG/YSgCRvm8+C3TMcKXAkBD -dhGn36wkUFCddMSAM4NSJ1VN8/Z0y5HzCmI8dM3VwGtGMUQlxKxwOl30LEQzdS5M -0SWojNYXiT2gOBfBwtbhAkEAhafl5QEOIgUz+XazS/IlZ8goNKdDVfYgK3mHHjvv -nY5G+AuGebdNkXJr4KSWxDcN+C2i47zuj4QXA16MAOandA== ------END RSA PRIVATE KEY----- -subject=/C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -issuer= /C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -notBefore=950413210656Z -notAfter =970412210656Z ------BEGIN X509 CERTIFICATE----- - -MIICCDCCAXECAQAwDQYJKoZIhvcNAQEEBQAwTjELMAkGA1UEBhMCVVMxHzAdBgNV -BAoUFkFUJlQgQmVsbCBMYWJvcmF0b3JpZXMxHjAcBgNVBAsUFVByb3RvdHlwZSBS -ZXNlYXJjaCBDQTAeFw05NTA0MTMyMTA2NTZaFw05NzA0MTIyMTA2NTZaME4xCzAJ -BgNVBAYTAlVTMR8wHQYDVQQKFBZBVCZUIEJlbGwgTGFib3JhdG9yaWVzMR4wHAYD -VQQLFBVQcm90b3R5cGUgUmVzZWFyY2ggQ0EwgZwwDQYJKoZIhvcNAQEBBQADgYoA -MIGGAoGAebOmgtSCl+wCYZc86UGYeTLY8cjmW2P0FN8ToT/u2pECCoFdrlycX0OR -3wt0ZhpFXLVNeDnHwEE9veNUih7pCL2ZBFqoIoQkB1lZmXRiVtjGonz8BLm/qrFM -YHb0lme/Ol+s118mwKVxnn6bSAeI/OXKhLaVdYZWk+aEaxEDkVkCAQ8wDQYJKoZI -hvcNAQEEBQADgYEAAZMG14lZmZ8bahkaHaTV9dQf4p2FZiQTFwHP9ZyGsXPC+LT5 -dG5iTaRmyjNIJdPWohZDl97kAci79aBndvuEvRKOjLHs3WRGBIwERnAcnY9Mz8u/ -zIHK23PjYVxGGaZd669OJwD0CYyqH22HH9nFUGaoJdsv39ChW0NRdLE9+y8= ------END X509 CERTIFICATE----- -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJjCCAY8CAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTc0M1oXDTAxMDYw -OTEzNTc0M1owWzELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYDVQQDExJUZXN0IENBICgxMDI0 -IGJpdCkwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAKO7o8t116VP6cgybTsZ -DCZhr95nYlZuya3aCi1IKoztqwWnjbmDFIriOqGFPrZQ+moMETC9D59iRW/dFXSv -1F65ka/XY2hLh9exCCo7XuUcDs53Qp3bI3AmMqHjgzE8oO3ajyJAzJkTTOUecQU2 -mw/gI4tMM0LqWMQS7luTy4+xAgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAM7achv3v -hLQJcv/65eGEpBXM40ZDVoFQFFJWaY5p883HTqLB1x4FdzsXHH0QKBTcKpWwqyu4 -YDm3fb8oDugw72bCzfyZK/zVZPR/hVlqI/fvU109Qoc+7oPvIXWky71HfcK6ZBCA -q30KIqGM/uoM60INq97qjDmCJapagcNBGQs= ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIICXQIBAAKBgQCju6PLddelT+nIMm07GQwmYa/eZ2JWbsmt2gotSCqM7asFp425 -gxSK4jqhhT62UPpqDBEwvQ+fYkVv3RV0r9ReuZGv12NoS4fXsQgqO17lHA7Od0Kd -2yNwJjKh44MxPKDt2o8iQMyZE0zlHnEFNpsP4COLTDNC6ljEEu5bk8uPsQIDAQAB -AoGAVZmpFZsDZfr0l2S9tLLwpjRWNOlKATQkno6q2WesT0eGLQufTciY+c8ypfU6 -hyio8r5iUl/VhhdjhAtKx1mRpiotftHo/eYf8rtsrnprOnWG0bWjLjtIoMbcxGn2 -J3bN6LJmbJMjDs0eJ3KnTu646F3nDUw2oGAwmpzKXA1KAP0CQQDRvQhxk2D3Pehs -HvG665u2pB5ipYQngEFlZO7RHJZzJOZEWSLuuMqaF/7pTfA5jiBvWqCgJeCRRInL -21ru4dlPAkEAx9jj7BgKn5TYnMoBSSe0afjsV9oApVpN1Nacb1YDtCwy+scp3++s -nFxlv98wxIlSdpwMUn+AUWfjiWR7Tu/G/wJBAJ/KjwZIrFVxewP0x2ILYsTRYLzz -MS4PDsO7FB+I0i7DbBOifXS2oNSpd3I0CNMwrxFnUHzynpbOStVfN3ZL5w0CQQCa -pwFahxBRhkJKsxhjoFJBX9yl75JoY4Wvm5Tbo9ih6UJaRx3kqfkN14L2BKYcsZgb -KY9vmDOYy6iNfjDeWTfJAkBkfPUb8oTJ/nSP5zN6sqGxSY4krc4xLxpRmxoJ8HL2 -XfhqXkTzbU13RX9JJ/NZ8vQN9Vm2NhxRGJocQkmcdVtJ ------END RSA PRIVATE KEY----- ------BEGIN X509 CERTIFICATE----- -MIICYDCCAiACAgEoMAkGBSsOAwINBQAwfDELMAkGA1UEBhMCVVMxNjA0BgNVBAoT -LU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFuZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZ -MBcGA1UECxMQVGVzdCBFbnZpcm9ubWVudDEaMBgGA1UECxMRRFNTLU5BU0EtUGls -b3QtQ0EwHhcNOTYwMjI2MTYzMjQ1WhcNOTcwMjI1MTYzMjQ1WjB8MQswCQYDVQQG -EwJVUzE2MDQGA1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFk -bWluaXN0cmF0aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MRowGAYDVQQL -ExFEU1MtTkFTQS1QaWxvdC1DQTCB8jAJBgUrDgMCDAUAA4HkADCB4AJBAMA/ssKb -hPNUG7ZlASfVwEJU21O5OyF/iyBzgHI1O8eOhJGUYO8cc8wDMjR508Mr9cp6Uhl/ -ZB7FV5GkLNEnRHYCQQDUEaSg45P2qrDwixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLb -bn3QK74T2IxY1yY+kCNq8XrIqf5fJJzIH0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3 -fVd0geUCQQCzCFUQAh+ZkEmp5804cs6ZWBhrUAfnra8lJItYo9xPcXgdIfLfibcX -R71UsyO77MRD7B0+Ag2tq794IleCVcEEMAkGBSsOAwINBQADLwAwLAIUUayDfreR -Yh2WeU86/pHNdkUC1IgCFEfxe1f0oMpxJyrJ5XIxTi7vGdoK ------END X509 CERTIFICATE----- ------BEGIN X509 CERTIFICATE----- - -MIICGTCCAdgCAwCqTDAJBgUrDgMCDQUAMHwxCzAJBgNVBAYTAlVTMTYwNAYDVQQK -Ey1OYXRpb25hbCBBZXJvbmF1dGljcyBhbmQgU3BhY2UgQWRtaW5pc3RyYXRpb24x -GTAXBgNVBAsTEFRlc3QgRW52aXJvbm1lbnQxGjAYBgNVBAsTEURTUy1OQVNBLVBp -bG90LUNBMB4XDTk2MDUxNDE3MDE0MVoXDTk3MDUxNDE3MDE0MVowMzELMAkGA1UE -BhMCQVUxDzANBgNVBAoTBk1pbmNvbTETMBEGA1UEAxMKRXJpYyBZb3VuZzCB8jAJ -BgUrDgMCDAUAA4HkADCB4AJBAKbfHz6vE6pXXMTpswtGUec2tvnfLJUsoxE9qs4+ -ObZX7LmLvragNPUeiTJx7UOWZ5DfBj6bXLc8eYne0lP1g3ACQQDUEaSg45P2qrDw -ixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLbbn3QK74T2IxY1yY+kCNq8XrIqf5fJJzI -H0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3fVd0geUCQQCzCFUQAh+ZkEmp5804cs6Z -WBhrUAfnra8lJItYo9xPcXgdIfLfibcXR71UsyO77MRD7B0+Ag2tq794IleCVcEE -MAkGBSsOAwINBQADMAAwLQIUWsuuJRE3VT4ueWkWMAJMJaZjj1ECFQCYY0zX4bzM -LC7obsrHD8XAHG+ZRG== ------END X509 CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICTTCCAbagAwIBAgIBADANBgkqhkiG9w0BAQQFADBMMQswCQYDVQQGEwJHQjEM -MAoGA1UEChMDVUNMMRgwFgYDVQQLEw9JQ0UtVEVMIFByb2plY3QxFTATBgNVBAMT -DFRydXN0RmFjdG9yeTAeFw05NzA0MjIxNDM5MTRaFw05ODA0MjIxNDM5MTRaMEwx -CzAJBgNVBAYTAkdCMQwwCgYDVQQKEwNVQ0wxGDAWBgNVBAsTD0lDRS1URUwgUHJv -amVjdDEVMBMGA1UEAxMMVHJ1c3RGYWN0b3J5MIGcMAoGBFUIAQECAgQAA4GNADCB -iQKBgQCEieR8NcXkUW1f0G6aC6u0i8q/98JqS6RxK5YmHIGKCkuTWAUjzLfUa4dt -U9igGCjTuxaDqlzEim+t/02pmiBZT9HaX++35MjQPUWmsChcYU5WyzGErXi+rQaw -zlwS73zM8qiPj/97lXYycWhgL0VaiDSPxRXEUdWoaGruom4mNQIDAQABo0IwQDAd -BgNVHQ4EFgQUHal1LZr7oVg5z6lYzrhTgZRCmcUwDgYDVR0PAQH/BAQDAgH2MA8G -A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAfaggfl6FZoioecjv0dq8 -/DXo/u11iMZvXn08gjX/zl2b4wtPbShOSY5FhkSm8GeySasz+/Nwb/uzfnIhokWi -lfPZHtlCWtXbIy/TN51eJyq04ceDCQDWvLC2enVg9KB+GJ34b5c5VaPRzq8MBxsA -S7ELuYGtmYgYm9NZOIr7yU0= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIIB6jCCAZQCAgEtMA0GCSqGSIb3DQEBBAUAMIGAMQswCQYDVQQGEwJVUzE2MDQG -A1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFkbWluaXN0cmF0 -aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MR4wHAYDVQQLExVNRDUtUlNB -LU5BU0EtUGlsb3QtQ0EwHhcNOTYwNDMwMjIwNTAwWhcNOTcwNDMwMjIwNTAwWjCB -gDELMAkGA1UEBhMCVVMxNjA0BgNVBAoTLU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFu -ZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZMBcGA1UECxMQVGVzdCBFbnZpcm9ubWVu -dDEeMBwGA1UECxMVTUQ1LVJTQS1OQVNBLVBpbG90LUNBMFkwCgYEVQgBAQICAgAD -SwAwSAJBALmmX5+GqAvcrWK13rfDrNX9UfeA7f+ijyBgeFQjYUoDpFqapw4nzQBL -bAXug8pKkRwa2Zh8YODhXsRWu2F/UckCAwEAATANBgkqhkiG9w0BAQQFAANBAH9a -OBA+QCsjxXgnSqHx04gcU8S49DVUb1f2XVoLnHlIb8RnX0k5O6mpHT5eti9bLkiW -GJNMJ4L0AJ/ac+SmHZc= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICajCCAdMCBDGA0QUwDQYJKoZIhvcNAQEEBQAwfTELMAkGA1UEBhMCQ2ExDzAN -BgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmlsaXR5IEFjY2VwdGVkMR8w -HQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRwwGgYDVQQDExNFbnRydXN0 -IERlbW8gV2ViIENBMB4XDTk2MDQyNjEzMzUwMVoXDTA2MDQyNjEzMzUwMVowfTEL -MAkGA1UEBhMCQ2ExDzANBgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmls -aXR5IEFjY2VwdGVkMR8wHQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRww -GgYDVQQDExNFbnRydXN0IERlbW8gV2ViIENBMIGdMA0GCSqGSIb3DQEBAQUAA4GL -ADCBhwKBgQCaroS7O1DA0hm4IefNYU1cx/nqOmzEnk291d1XqznDeF4wEgakbkCc -zTKxK791yNpXG5RmngqH7cygDRTHZJ6mfCRn0wGC+AI00F2vYTGqPGRQL1N3lZT0 -YDKFC0SQeMMjFIZ1aeQigroFQnHo0VB3zWIMpNkka8PY9lxHZAmWwQIBAzANBgkq -hkiG9w0BAQQFAAOBgQBAx0UMVA1s54lMQyXjMX5kj99FJN5itb8bK1Rk+cegPQPF -cWO9SEWyEjjBjIkjjzAwBkaEszFsNGxemxtXvwjIm1xEUMTVlPEWTs2qnDvAUA9W -YqhWbhH0toGT36236QAsqCZ76rbTRVSSX2BHyJwJMG2tCRv7kRJ//NIgxj3H4w== ------END CERTIFICATE----- - -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJzCCAZACAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTczN1oXDTAxMDYw -OTEzNTczN1owXDELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYDVQQDExNUZXN0IFBDQSAoMTAy -NCBiaXQpMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCdoWk/3+WcMlfjIrkg -40ketmnQaEogQe1LLcuOJV6rKfUSAsPgwgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp -22Jp85PmemiDzyUIStwk72qhp1imbANZvlmlCFKiQrjUyuDfu4TABmn+kkt3vR1Y -BEOGt+IFye1UBVSATVdRJ2UVhwIDAQABMA0GCSqGSIb3DQEBBAUAA4GBABNA1u/S -Cg/LJZWb7GliiKJsvuhxlE4E5JxQF2zMub/CSNbF97//tYSyj96sxeFQxZXbcjm9 -xt6mr/xNLA4szNQMJ4P+L7b5e/jC5DSqlwS+CUYJgaFs/SP+qJoCSu1bR3IM9XWO -cRBpDmcBbYLkSyB92WURvsZ1LtjEcn+cdQVI +subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert #2 +issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA +-----BEGIN CERTIFICATE----- +MIID6jCCAtKgAwIBAgIJALnu1NlVpZ60MA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT +VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt +ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZzELMAkG +A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU +RVNUSU5HIFBVUlBPU0VTIE9OTFkxHDAaBgNVBAMME1Rlc3QgU2VydmVyIENlcnQg +IzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDrdi7j9yctG+L4EjBy +gjPmEqZzOJEQba26MoQGzglU7e5Xf59Rb/hgVQuKAoiZe7/R8rK4zJ4W7iXdXw0L +qBpyG8B5aGKeI32w+A9TcBApoXXL2CrYQEQjZwUIpLlYBIi2NkJj3nVkq5dgl1gO +ALiQ+W8jg3kzg5Ec9rimp9r93N8wsSL3awsafurmYCvOf7leHaMP1WJ/zDRGUNHG +/WtDjXc8ZUG1+6EXU9Jc2Fs+2Omf7fcN0l00AK/wPg8OaNS0rKyGq9JdIT9FRGV1 +bXe/rx58FaE5CItdwCSYhJvF/O95LWQoxJXye5bCFLmvDTEyVq9FMSCptfsmbXjE +ZGsXAgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJ +YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud +DgQWBBR52UaWWTKzZGDH/X4mWNcuqeQVazAfBgNVHSMEGDAWgBQ2w2yI55X+sL3s +zj49hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEANBW+XYLlHBqVY/31ie+3gRlS +LPfy4SIqn0t3RJjagT29MXprblBO2cbMO8VGjkQdKGpmMXjxbht2arOOUXRHX4n/ +XTyn/QHEf0bcwIITMReO3DZUPAEw8hSjn9xEOM0IRVOCP+mH5fi74QzzQaZVCyYg +5VtLKdww/+sc0nCbKl2KWgDluriH0nfVx95qgW3mg9dhXRr0zmf1w2zkBHYpARYL +Dew6Z8EE4tS3HJu8/qM6meWzNtrfonQ3eiiMxjZBxzV46jchBwa2z9XYhP6AmpPb +oeTSzcQNbWsxaGYzWo46oLDUZmJOwSBawbS31bZNMCoPIY6ukoesCzFSsUKZww== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQCdoWk/3+WcMlfjIrkg40ketmnQaEogQe1LLcuOJV6rKfUSAsPg -wgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp22Jp85PmemiDzyUIStwk72qhp1imbANZ -vlmlCFKiQrjUyuDfu4TABmn+kkt3vR1YBEOGt+IFye1UBVSATVdRJ2UVhwIDAQAB -AoGAba4fTtuap5l7/8ZsbE7Z1O32KJY4ZcOZukLOLUUhXxXduT+FTgGWujc0/rgc -z9qYCLlNZHOouMYTgtSfYvuMuLZ11VIt0GYH+nRioLShE59Yy+zCRyC+gPigS1kz -xvo14AsOIPYV14Tk/SsHyq6E0eTk7VzaIE197giiINUERPECQQDSKmtPTh/lRKw7 -HSZSM0I1mFWn/1zqrAbontRQY5w98QWIOe5qmzYyFbPXYT3d9BzlsMyhgiRNoBbD -yvohSHXJAkEAwAHx6ezAZeWWzD5yXD36nyjpkVCw7Tk7TSmOceLJMWt1QcrCfqlS -xA5jjpQ6Z8suU5DdtWAryM2sAir1WisYzwJAd6Zcx56jvAQ3xcPXsE6scBTVFzrj -7FqZ6E+cclPzfLQ+QQsyOBE7bpI6e/FJppY26XGZXo3YGzV8IGXrt40oOQJALETG -h86EFXo3qGOFbmsDy4pdP5nBERCu8X1xUCSfintiD4c2DInxgS5oGclnJeMcjTvL -QjQoJCX3UJCi/OUO1QJBAKgcDHWjMvt+l1pjJBsSEZ0HX9AAIIVx0RQmbFGS+F2Q -hhu5l77WnnZOQ9vvhV5u7NPCUF9nhU3jh60qWWO8mkc= +MIIEowIBAAKCAQEA63Yu4/cnLRvi+BIwcoIz5hKmcziREG2tujKEBs4JVO3uV3+f +UW/4YFULigKImXu/0fKyuMyeFu4l3V8NC6gachvAeWhiniN9sPgPU3AQKaF1y9gq +2EBEI2cFCKS5WASItjZCY951ZKuXYJdYDgC4kPlvI4N5M4ORHPa4pqfa/dzfMLEi +92sLGn7q5mArzn+5Xh2jD9Vif8w0RlDRxv1rQ413PGVBtfuhF1PSXNhbPtjpn+33 +DdJdNACv8D4PDmjUtKyshqvSXSE/RURldW13v68efBWhOQiLXcAkmISbxfzveS1k +KMSV8nuWwhS5rw0xMlavRTEgqbX7Jm14xGRrFwIDAQABAoIBAHLsTPihIfLnYIE5 +x4GsQQ5zXeBw5ITDM37ktwHnQDC+rIzyUl1aLD1AZRBoKinXd4lOTqLZ4/NHKx4A +DYr58mZtWyUmqLOMmQVuHXTZBlp7XtYuXMMNovQwjQlp9LicBeoBU6gQ5PVMtubD +F4xGF89Sn0cTHW3iMkqTtQ5KcR1j57OcJO0FEb1vPvk2MXI5ZyAatUYE7YacbEzd +rg02uIwx3FqNSkuSI79uz4hMdV5TPtuhxx9nTwj9aLUhXFeZ0mn2PVgVzEnnMoJb ++znlsZDgzDlJqdaD744YGWh8Z3OEssB35KfzFcdOeO6yH8lmv2Zfznk7pNPT7LTb +Lae9VgkCgYEA92p1qnAB3NtJtNcaW53i0S5WJgS1hxWKvUDx3lTB9s8X9fHpqL1a +E94fDfWzp/hax6FefUKIvBOukPLQ6bYjTMiFoOHzVirghAIuIUoMI5VtLhwD1hKs +Lr7l/dptMgKb1nZHyXoKHRBthsy3K4+udsPi8TzMvYElgEqyQIe/Rk0CgYEA86GL +8HC6zLszzKERDPBxrboRmoFvVUCTQDhsfj1M8aR3nQ8V5LkdIJc7Wqm/Ggfk9QRf +rJ8M2WUMlU5CNnCn/KCrKzCNZIReze3fV+HnKdbcXGLvgbHPrhnz8yYehUFG+RGq +bVyDWRU94T38izy2s5qMYrMJWZEYyXncSPbfcPMCgYAtaXfxcZ+V5xYPQFARMtiX +5nZfggvDoJuXgx0h3tK/N2HBfcaSdzbaYLG4gTmZggc/jwnl2dl5E++9oSPhUdIG +3ONSFUbxsOsGr9PBvnKd8WZZyUCXAVRjPBzAzF+whzQNWCZy/5htnz9LN7YDI9s0 +5113Q96cheDZPFydZY0hHQKBgQDVbEhNukM5xCiNcu+f2SaMnLp9EjQ4h5g3IvaP +5B16daw/Dw8LzcohWboqIxeAsze0GD/D1ZUJAEd0qBjC3g+a9BjefervCjKOzXng +38mEUm+6EwVjJSQcjSmycEs+Sr/kwr/8i5WYvU32+jk4tFgMoC+o6tQe/Uesf68k +z/dPVwKBgGbF7Vv1/3SmhlOy+zYyvJ0CrWtKxH9QP6tLIEgEpd8x7YTSuCH94yok +kToMXYA3sWNPt22GbRDZ+rcp4c7HkDx6I6vpdP9aQEwJTp0EPy0sgWr2XwYmreIQ +NFmkk8Itn9EY2R9VBaP7GLv5kvwxDdLAnmwGmzVtbmaVdxCaBwUk -----END RSA PRIVATE KEY----- -subject=/C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -notBefore=941104185834Z -notAfter =991103185834Z ------BEGIN X509 CERTIFICATE----- - -MIICIzCCAZACBQJBAAAWMA0GCSqGSIb3DQEBAgUAMFwxCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVy -Y2lhbCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDQxODU4MzRaFw05 -OTExMDMxODU4MzRaMFwxCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0YSBT -ZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVyY2lhbCBDZXJ0aWZpY2F0aW9u -IEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCk+4Fie84QJ93o -975sbsZwmdu41QUDaSiCnHJ/lj+O7Kwpkj+KFPhCdr69XQO5kNTQvAayUTNfxMK/ -touPmbZiImDd298ggrTKoi8tUO2UMt7gVY3UaOLgTNLNBRYulWZcYVI4HlGogqHE -7yXpCuaLK44xZtn42f29O2nZ6wIDAQABMA0GCSqGSIb3DQEBAgUAA34AdrW2EP4j -9/dZYkuwX5zBaLxJu7NJbyFHXSudVMQAKD+YufKKg5tgf+tQx6sFEC097TgCwaVI -0v5loMC86qYjFmZsGySp8+x5NRhPJsjjr1BKx6cxa9B8GJ1Qv6km+iYrRpwUqbtb -MJhCKLVLU7tDCZJAuqiqWqTGtotXTcU= ------END X509 CERTIFICATE----- -subject=/C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -notBefore=941109235417Z -notAfter =991231235417Z ------BEGIN X509 CERTIFICATE----- - -MIICKTCCAZYCBQJBAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJl -IFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDkyMzU0MTda -Fw05OTEyMzEyMzU0MTdaMF8xCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0 -YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJlIFNlcnZlciBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCSznrB -roM+WqqJg1esJQF2DK2ujiw3zus1eGRUA+WEQFHJv48I4oqCCNIWhjdV6bEhAq12 -aIGaBaJLyUslZiJWbIgHj/eBWW2EB2VwE3F2Ppt3TONQiVaYSLkdpykaEy5KEVmc -HhXVSVQsczppgrGXOZxtcGdI5d0t1sgeewIDAQABMA0GCSqGSIb3DQEBAgUAA34A -iNHReSHO4ovo+MF9NFM/YYPZtgs4F7boviGNjwC4i1N+RGceIr2XJ+CchcxK9oU7 -suK+ktPlDemvXA4MRpX/oRxePug2WHpzpgr4IhFrwwk4fia7c+8AvQKk8xQNMD9h -cHsg/jKjn7P0Z1LctO6EjJY2IN6BCINxIYoPnqk= ------END X509 CERTIFICATE----- -subject=/C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com -issuer= /C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com ------BEGIN CERTIFICATE----- -MIIC+TCCAmICAQAwDQYJKoZIhvcNAQEEBQAwgcQxCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkq -hkiG9w0BCQEWF3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMB4XDTk2MDcyNzE4MDc1 -N1oXDTk4MDcyNzE4MDc1N1owgcQxCzAJBgNVBAYTAlpBMRUwEwYDVQQIEwxXZXN0 -ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMUVGhhd3RlIENv -bnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2VydmljZXMgRGl2 -aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkqhkiG9w0BCQEW -F3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB -iQKBgQDTpFBuyP9Wa+bPXbbqDGh1R6KqwtqEJfyo9EdR2oW1IHSUhh4PdcnpCGH1 -Bm0wbhUZAulSwGLbTZme4moMRDjN/r7jZAlwxf6xaym2L0nIO9QnBCUQly/nkG3A -KEKZ10xD3sP1IW1Un13DWOHA5NlbsLjctHvfNjrCtWYiEtaHDQIDAQABMA0GCSqG -SIb3DQEBBAUAA4GBAIsvn7ifX3RUIrvYXtpI4DOfARkTogwm6o7OwVdl93yFhDcX -7h5t0XZ11MUAMziKdde3rmTvzUYIUCYoY5b032IwGMTvdiclK+STN6NP2m5nvFAM -qJT5gC5O+j/jBuZRQ4i0AMYQr5F4lT8oBJnhgafw6PL8aDY2vMHGSPl9+7uf ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIIDDTCCAnYCAQAwDQYJKoZIhvcNAQEEBQAwgc4xCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNlcnZlciBD -QTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNvbTAeFw05 -NjA3MjcxODA3MTRaFw05ODA3MjcxODA3MTRaMIHOMQswCQYDVQQGEwJaQTEVMBMG -A1UECBMMV2VzdGVybiBDYXBlMRIwEAYDVQQHEwlDYXBlIFRvd24xHTAbBgNVBAoT -FFRoYXd0ZSBDb25zdWx0aW5nIGNjMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9uIFNl -cnZpY2VzIERpdmlzaW9uMSEwHwYDVQQDExhUaGF3dGUgUHJlbWl1bSBTZXJ2ZXIg -Q0ExKDAmBgkqhkiG9w0BCQEWGXByZW1pdW0tc2VydmVyQHRoYXd0ZS5jb20wgZ8w -DQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANI2NmqL18JbntqBQWKPOO5JBFXW0O8c -G5UWR+8YSDU6UvQragaPOy/qVuOvho2eF/eetGV1Ak3vywmiIVHYm9Bn0LoNkgYU -c9STy5cqAJxcTgy8+hVS/PJEbtoRSm4Iny8t4/mqOoZztkZTWMiJBb2DEbhzP6oH -jfRCTedAnRw3AgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAutFIgTRZVYerIZfL9lvR -w9Eifvvo5KTZ3h+Bj+VzNnyw4Qc/IyXkPOu6SIiH9LQ3sCmWBdxpe+qr4l77rLj2 -GYuMtESFfn1XVALzkYgC7JcPuTOjMfIiMByt+uFf8AV8x0IW/Qkuv+hEQcyM9vxK -3VZdLbCVIhNoEsysrxCpxcI= ------END CERTIFICATE----- -Tims test GCI CA - ------BEGIN CERTIFICATE----- -MIIB8DCCAZoCAQAwDQYJKoZIhvcNAQEEBQAwgYIxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2RldmVsb3BtZW50MRkwFwYDVQQDExBD -cnlwdFNvZnQgRGV2IENBMB4XDTk3MDMyMjEzMzQwNFoXDTk4MDMyMjEzMzQwNFow -gYIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhC -cmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2Rl -dmVsb3BtZW50MRkwFwYDVQQDExBDcnlwdFNvZnQgRGV2IENBMFwwDQYJKoZIhvcN -AQEBBQADSwAwSAJBAOAOAqogG5QwAmLhzyO4CoRnx/wVy4NZP4dxJy83O1EnL0rw -OdsamJKvPOLHgSXo3gDu9uVyvCf/QJmZAmC5ml8CAwEAATANBgkqhkiG9w0BAQQF -AANBADRRS/GVdd7rAqRW6SdmgLJduOU2yq3avBu99kRqbp9A/dLu6r6jU+eP4oOA -TfdbFZtAAD2Hx9jUtY3tfdrJOb8= ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIICVjCCAgACAQAwDQYJKoZIhvcNAQEEBQAwgbUxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsTI1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9O -IEFVVEhPUklUSUVTMTQwMgYDVQQDEytaRVJPIFZBTFVFIENBIC0gREVNT05TVFJB -VElPTiBQVVJQT1NFUyBPTkxZMB4XDTk3MDQwMzEzMjI1NFoXDTk4MDQwMzEzMjI1 -NFowgbUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsT -I1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9OIEFVVEhPUklUSUVTMTQwMgYDVQQDEyta -RVJPIFZBTFVFIENBIC0gREVNT05TVFJBVElPTiBQVVJQT1NFUyBPTkxZMFwwDQYJ -KoZIhvcNAQEBBQADSwAwSAJBAOZ7T7yqP/tyspcko3yPY1y0Cm2EmwNvzW4QgVXR -Fjs3HmJ4xtSpXdo6mwcGezL3Abt/aQXaxv9PU8xt+Jr0OFUCAwEAATANBgkqhkiG -9w0BAQQFAANBAOQpYmGgyCqCy1OljgJhCqQOu627oVlHzK1L+t9vBaMfn40AVUR4 -WzQVWO31KTgi5vTK1U+3h46fgUWqQ0h+6rU= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIAwgKADAgECAgEAMA0GCSqGSIb3DQEBBAUAMGIxETAPBgNVBAcTCEludGVybmV0 -MRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xh -c3MgMSBDQSAtIEluZGl2aWR1YWwgU3Vic2NyaWJlcjAeFw05NjA0MDgxMDIwMjda -Fw05NzA0MDgxMDIwMjdaMGIxETAPBgNVBAcTCEludGVybmV0MRcwFQYDVQQKEw5W -ZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xhc3MgMSBDQSAtIElu -ZGl2aWR1YWwgU3Vic2NyaWJlcjCAMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC2 -FKbPTdAFDdjKI9BvqrQpkmOOLPhvltcunXZLEbE2jVfJw/0cxrr+Hgi6M8qV6r7j -W80GqLd5HUQq7XPysVKDaBBwZJHXPmv5912dFEObbpdFmIFH0S3L3bty10w/cari -QPJUObwW7s987LrbP2wqsxaxhhKdrpM01bjV0Pc+qQIDAQABAAAAADANBgkqhkiG -9w0BAQQFAAOBgQA+1nJryNt8VBRjRr07ArDAV/3jAH7GjDc9jsrxZS68ost9v06C -TvTNKGL+LISNmFLXl+JXhgGB0JZ9fvyYzNgHQ46HBUng1H6voalfJgS2KdEo50wW -8EFZYMDkT1k4uynwJqkVN2QJK/2q4/A/VCov5h6SlM8Affg2W+1TLqvqkwAA ------END CERTIFICATE----- - - subject=/L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - issuer= /L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - ------BEGIN CERTIFICATE----- -MIIEkzCCA/ygAwIBAgIRANDTUpSRL3nTFeMrMayFSPAwDQYJKoZIhvcNAQECBQAw -YjERMA8GA1UEBxMISW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQw -MgYDVQQLEytWZXJpU2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3Jp -YmVyMB4XDTk2MDYwNDAwMDAwMFoXDTk4MDYwNDIzNTk1OVowYjERMA8GA1UEBxMI -SW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJp -U2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3JpYmVyMIGfMA0GCSqG -SIb3DQEBAQUAA4GNADCBiQKBgQC6A+2czKGRcYMfm8gdnk+0de99TDDzsqo0v5nb -RsbUmMcdRQ7nsMbRWe0SAb/9QoLTZ/cJ0iOBqdrkz7UpqqKarVoTSdlSMVM92tWp -3bJncZHQD1t4xd6lQVdI1/T6R+5J0T1ukOdsI9Jmf+F28S6g3R3L1SFwiHKeZKZv -z+793wIDAQABo4ICRzCCAkMwggIpBgNVHQMBAf8EggIdMIICGTCCAhUwggIRBgtg -hkgBhvhFAQcBATCCAgAWggGrVGhpcyBjZXJ0aWZpY2F0ZSBpbmNvcnBvcmF0ZXMg -YnkgcmVmZXJlbmNlLCBhbmQgaXRzIHVzZSBpcyBzdHJpY3RseSBzdWJqZWN0IHRv -LCB0aGUgVmVyaVNpZ24gQ2VydGlmaWNhdGlvbiBQcmFjdGljZSBTdGF0ZW1lbnQg -KENQUyksIGF2YWlsYWJsZSBhdDogaHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL0NQ -Uy0xLjA7IGJ5IEUtbWFpbCBhdCBDUFMtcmVxdWVzdHNAdmVyaXNpZ24uY29tOyBv -ciBieSBtYWlsIGF0IFZlcmlTaWduLCBJbmMuLCAyNTkzIENvYXN0IEF2ZS4sIE1v -dW50YWluIFZpZXcsIENBIDk0MDQzIFVTQSBUZWwuICsxICg0MTUpIDk2MS04ODMw -IENvcHlyaWdodCAoYykgMTk5NiBWZXJpU2lnbiwgSW5jLiAgQWxsIFJpZ2h0cyBS -ZXNlcnZlZC4gQ0VSVEFJTiBXQVJSQU5USUVTIERJU0NMQUlNRUQgYW5kIExJQUJJ -TElUWSBMSU1JVEVELqAOBgxghkgBhvhFAQcBAQGhDgYMYIZIAYb4RQEHAQECMC8w -LRYraHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL3JlcG9zaXRvcnkvQ1BTLTEuMDAU -BglghkgBhvhCAQEBAf8EBAMCAgQwDQYJKoZIhvcNAQECBQADgYEApRJRkNBqLLgs -53IR/d18ODdLOWMTZ+QOOxBrq460iBEdUwgF8vmPRX1ku7UiDeNzaLlurE6eFqHq -2zPyK5j60zfTLVJMWKcQWwTJLjHtXrW8pxhNtFc6Fdvy5ZkHnC/9NIl7/t4U6WqB -p4y+p7SdMIkEwIZfds0VbnQyX5MRUJY= ------END CERTIFICATE----- - - subject=/C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKhAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAyVxZ -nvIbigEUtBDfBEDb41evakVAj4QMC9Ez2dkRz+4CWB8l9yqoRAWq7AMfeH+ek7ma -AKojfdashaJjRcdyJ8z0TMZ1cdI5709C8HXfCpDGjiBvmA/4rCNfcCk2pMmG57Ga -IMtTpYXnPb59mv4kRTPcdhXtD6JxZExlLoFoRacCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQB1Zmw+0c2B27X4LzZRtvdCvM1Cr9wO+hVs+GeTVzrrtpLotgHKjLeOQ7RJ -Zfk+7r11Ri7J/CVdqMcvi5uPaM+0nJcYwE3vH9mvgrPmZLiEXIqaB1JDYft0nls6 -NvxMsvwaPxUupVs8G5DsiCnkWRb5zget7Ond2tIxik/W2O8XjQ== ------END CERTIFICATE----- - subject=/C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKmAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0LJ1 -9njQrlpQ9OlQqZ+M1++RlHDo0iSQdomF1t+s5gEXMoDwnZNHvJplnR+Xrr/phnVj -IIm9gFidBAydqMEk6QvlMXi9/C0MN2qeeIDpRnX57aP7E3vIwUzSo+/1PLBij0pd -O92VZ48TucE81qcmm+zDO3rZTbxtm+gVAePwR6kCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQBT3dPwnCR+QKri/AAa19oM/DJhuBUNlvP6Vxt/M3yv6ZiaYch6s7f/sdyZ -g9ysEvxwyR84Qu1E9oAuW2szaayc01znX1oYx7EteQSWQZGZQbE8DbqEOcY7l/Am -yY7uvcxClf8exwI/VAx49byqYHwCaejcrOICdmHEPgPq0ook0Q== ------END CERTIFICATE----- diff --git a/app/openssl/android.testssl/testssl b/app/openssl/android.testssl/testssl index 3f24b1d0..5ff48604 100755 --- a/app/openssl/android.testssl/testssl +++ b/app/openssl/android.testssl/testssl @@ -70,15 +70,6 @@ $ssltest -client_auth $CA $extra || exit 1 echo test sslv2/sslv3 with both client and server authentication $ssltest -server_auth -client_auth $CA $extra || exit 1 -echo test sslv2/sslv3 with both client and server authentication and small client buffers -$ssltest -server_auth -client_auth -c_small_records $CA $extra || exit 1 - -echo test sslv2/sslv3 with both client and server authentication and small server buffers -$ssltest -server_auth -client_auth -s_small_records $CA $extra || exit 1 - -echo test sslv2/sslv3 with both client and server authentication and small client and server buffers -$ssltest -server_auth -client_auth -c_small_records -s_small_records $CA $extra || exit 1 - echo test sslv2/sslv3 with both client and server authentication and handshake cutthrough $ssltest -server_auth -client_auth -cutthrough $CA $extra || exit 1 @@ -112,8 +103,8 @@ echo test sslv2/sslv3 via BIO pair $ssltest $extra || exit 1 if [ $dsa_cert = NO ]; then - echo test sslv2/sslv3 w/o DHE via BIO pair - $ssltest -bio_pair -no_dhe $extra || exit 1 + echo 'test sslv2/sslv3 w/o (EC)DHE via BIO pair' + $ssltest -bio_pair -no_dhe -no_ecdhe $extra || exit 1 fi echo test sslv2/sslv3 with 1024bit DHE via BIO pair @@ -131,6 +122,23 @@ $ssltest -bio_pair -server_auth -client_auth $CA $extra || exit 1 echo test sslv2/sslv3 with both client and server authentication via BIO pair and app verify $ssltest -bio_pair -server_auth -client_auth -app_verify $CA $extra || exit 1 +echo "Testing ciphersuites" +for protocol in TLSv1.2 SSLv3; do + echo "Testing ciphersuites for $protocol" + for cipher in `adb shell /system/bin/openssl ciphers "RSA+$protocol" | tr ':' ' '`; do + echo "Testing $cipher" + prot="" + if [ $protocol = "SSLv3" ] ; then + prot="-ssl3" + fi + $ssltest -cipher $cipher $prot + if [ $? -ne 0 ] ; then + echo "Failed $cipher" + exit 1 + fi + done +done + ############################################################################# if [ `adb shell /system/bin/openssl no-dh` = no-dh ]; then @@ -143,8 +151,8 @@ fi if [ `adb shell /system/bin/openssl no-rsa` = no-dh ]; then echo skipping RSA tests else - echo test tls1 with 1024bit RSA, no DHE, multiple handshakes - adb shell /system/bin/ssltest -v -bio_pair -tls1 -cert /sdcard/android.testssl/server2.pem -no_dhe -num 10 -f -time $extra || exit 1 + echo 'test tls1 with 1024bit RSA, no (EC)DHE, multiple handshakes' + adb shell /system/bin/ssltest -v -bio_pair -tls1 -cert /sdcard/android.testssl/server2.pem -no_dhe -no_ecdhe -num 10 -f -time $extra || exit 1 if [ `adb shell /system/bin/openssl no-dh` = no-dh ]; then echo skipping RSA+DHE tests @@ -160,4 +168,14 @@ $ssltest -tls1 -cipher PSK -psk abc123 $extra || exit 1 echo test tls1 with PSK via BIO pair $ssltest -bio_pair -tls1 -cipher PSK -psk abc123 $extra || exit 1 +if adb shell /system/bin/openssl no-srp; then + echo skipping SRP tests +else + echo test tls1 with SRP + $ssltest -tls1 -cipher SRP -srpuser test -srppass abc123 + + echo test tls1 with SRP via BIO pair + $ssltest -bio_pair -tls1 -cipher SRP -srpuser test -srppass abc123 +fi + exit 0 diff --git a/app/openssl/apps/#Android.mk# b/app/openssl/apps/#Android.mk# deleted file mode 100644 index 1ad078de..00000000 --- a/app/openssl/apps/#Android.mk# +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2006 The Android Open Source Project - -LOCAL_PATH:= $(call my-dir) - -local_src_files:= \ - app_rand.c \ - apps.c \ - asn1pars.c \ - ca.c \ - ciphers.c \ - crl.c \ - crl2p7.c \ - dgst.c \ - dh.c \ - dhparam.c \ - dsa.c \ - dsaparam.c \ - ecparam.c \ - ec.c \ - enc.c \ - engine.c \ - errstr.c \ - gendh.c \ - gendsa.c \ - genpkey.c \ - genrsa.c \ - nseq.c \ - ocsp.c \ - openssl.c \ - passwd.c \ - pkcs12.c \ - pkcs7.c \ - pkcs8.c \ - pkey.c \ - pkeyparam.c \ - pkeyutl.c \ - prime.c \ - rand.c \ - req.c \ - rsa.c \ - rsautl.c \ - s_cb.c \ - s_client.c \ - s_server.c \ - s_socket.c \ - s_time.c \ - sess_id.c \ - smime.c \ - speed.c \ - spkac.c \ - verify.c \ - version.c \ - x509.c - -local_shared_libraries := \ - libssl \ - libcrypto - -local_c_includes := \ - openssl \ - openssl/include - -local_cflags := -DMONOLITH - -# These flags omit whole features from the commandline "openssl". -# However, portions of these features are actually turned on. -local_cflags += -DOPENSSL_NO_DTLS1 - -include $(CLEAR_VARS) -LOCAL_MODULE:= openssl -LOCAL_MODULE_TAGS := optional -LOCAL_SRC_FILES := $(local_src_files) -LOCAL_SHARED_LIBRARIES := $(local_shared_libraries) -LOCAL_C_INCLUDES := $(local_c_includes) -LOCAL_CFLAGS := $(local_cflags) -include $(LOCAL_PATH)/../android-config.mk -include $(BUILD_EXECUTABLE) - diff --git a/app/openssl/apps/Android.mk b/app/openssl/apps/Android.mk deleted file mode 100644 index 1ad078de..00000000 --- a/app/openssl/apps/Android.mk +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2006 The Android Open Source Project - -LOCAL_PATH:= $(call my-dir) - -local_src_files:= \ - app_rand.c \ - apps.c \ - asn1pars.c \ - ca.c \ - ciphers.c \ - crl.c \ - crl2p7.c \ - dgst.c \ - dh.c \ - dhparam.c \ - dsa.c \ - dsaparam.c \ - ecparam.c \ - ec.c \ - enc.c \ - engine.c \ - errstr.c \ - gendh.c \ - gendsa.c \ - genpkey.c \ - genrsa.c \ - nseq.c \ - ocsp.c \ - openssl.c \ - passwd.c \ - pkcs12.c \ - pkcs7.c \ - pkcs8.c \ - pkey.c \ - pkeyparam.c \ - pkeyutl.c \ - prime.c \ - rand.c \ - req.c \ - rsa.c \ - rsautl.c \ - s_cb.c \ - s_client.c \ - s_server.c \ - s_socket.c \ - s_time.c \ - sess_id.c \ - smime.c \ - speed.c \ - spkac.c \ - verify.c \ - version.c \ - x509.c - -local_shared_libraries := \ - libssl \ - libcrypto - -local_c_includes := \ - openssl \ - openssl/include - -local_cflags := -DMONOLITH - -# These flags omit whole features from the commandline "openssl". -# However, portions of these features are actually turned on. -local_cflags += -DOPENSSL_NO_DTLS1 - -include $(CLEAR_VARS) -LOCAL_MODULE:= openssl -LOCAL_MODULE_TAGS := optional -LOCAL_SRC_FILES := $(local_src_files) -LOCAL_SHARED_LIBRARIES := $(local_shared_libraries) -LOCAL_C_INCLUDES := $(local_c_includes) -LOCAL_CFLAGS := $(local_cflags) -include $(LOCAL_PATH)/../android-config.mk -include $(BUILD_EXECUTABLE) - diff --git a/app/openssl/apps/apps.c b/app/openssl/apps/apps.c index 38e61970..b76db10a 100644 --- a/app/openssl/apps/apps.c +++ b/app/openssl/apps/apps.c @@ -109,7 +109,7 @@ * */ -#ifndef _POSIX_C_SOURCE +#if !defined(_POSIX_C_SOURCE) && defined(OPENSSL_SYS_VMS) #define _POSIX_C_SOURCE 2 /* On VMS, you need to define this to get the declaration of fileno(). The value 2 is to make sure no function defined @@ -586,12 +586,12 @@ int password_callback(char *buf, int bufsiz, int verify, if (ok >= 0) ok = UI_add_input_string(ui,prompt,ui_flags,buf, - PW_MIN_LENGTH,BUFSIZ-1); + PW_MIN_LENGTH,bufsiz-1); if (ok >= 0 && verify) { buff = (char *)OPENSSL_malloc(bufsiz); ok = UI_add_verify_string(ui,prompt,ui_flags,buff, - PW_MIN_LENGTH,BUFSIZ-1, buf); + PW_MIN_LENGTH,bufsiz-1, buf); } if (ok >= 0) do @@ -1215,7 +1215,8 @@ STACK_OF(X509) *load_certs(BIO *err, const char *file, int format, const char *pass, ENGINE *e, const char *desc) { STACK_OF(X509) *certs; - load_certs_crls(err, file, format, pass, e, desc, &certs, NULL); + if (!load_certs_crls(err, file, format, pass, e, desc, &certs, NULL)) + return NULL; return certs; } @@ -1223,7 +1224,8 @@ STACK_OF(X509_CRL) *load_crls(BIO *err, const char *file, int format, const char *pass, ENGINE *e, const char *desc) { STACK_OF(X509_CRL) *crls; - load_certs_crls(err, file, format, pass, e, desc, NULL, &crls); + if (!load_certs_crls(err, file, format, pass, e, desc, NULL, &crls)) + return NULL; return crls; } @@ -2130,7 +2132,7 @@ X509_NAME *parse_name(char *subject, long chtype, int multirdn) X509_NAME *n = NULL; int nid; - if (!buf || !ne_types || !ne_values) + if (!buf || !ne_types || !ne_values || !mval) { BIO_printf(bio_err, "malloc error\n"); goto error; @@ -2234,6 +2236,7 @@ X509_NAME *parse_name(char *subject, long chtype, int multirdn) OPENSSL_free(ne_values); OPENSSL_free(ne_types); OPENSSL_free(buf); + OPENSSL_free(mval); return n; error: @@ -2242,6 +2245,8 @@ error: OPENSSL_free(ne_values); if (ne_types) OPENSSL_free(ne_types); + if (mval) + OPENSSL_free(mval); if (buf) OPENSSL_free(buf); return NULL; @@ -2256,6 +2261,7 @@ int args_verify(char ***pargs, int *pargc, int purpose = 0, depth = -1; char **oldargs = *pargs; char *arg = **pargs, *argn = (*pargs)[1]; + time_t at_time = 0; if (!strcmp(arg, "-policy")) { if (!argn) @@ -2308,6 +2314,27 @@ int args_verify(char ***pargs, int *pargc, } (*pargs)++; } + else if (strcmp(arg,"-attime") == 0) + { + if (!argn) + *badarg = 1; + else + { + long timestamp; + /* interpret the -attime argument as seconds since + * Epoch */ + if (sscanf(argn, "%li", ×tamp) != 1) + { + BIO_printf(bio_err, + "Error parsing timestamp %s\n", + argn); + *badarg = 1; + } + /* on some platforms time_t may be a float */ + at_time = (time_t) timestamp; + } + (*pargs)++; + } else if (!strcmp(arg, "-ignore_critical")) flags |= X509_V_FLAG_IGNORE_CRITICAL; else if (!strcmp(arg, "-issuer_checks")) @@ -2362,6 +2389,9 @@ int args_verify(char ***pargs, int *pargc, if (depth >= 0) X509_VERIFY_PARAM_set_depth(*pm, depth); + if (at_time) + X509_VERIFY_PARAM_set_time(*pm, at_time); + end: (*pargs)++; @@ -2693,6 +2723,50 @@ void jpake_server_auth(BIO *out, BIO *conn, const char *secret) #endif +#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +/* next_protos_parse parses a comma separated list of strings into a string + * in a format suitable for passing to SSL_CTX_set_next_protos_advertised. + * outlen: (output) set to the length of the resulting buffer on success. + * err: (maybe NULL) on failure, an error message line is written to this BIO. + * in: a NUL termianted string like "abc,def,ghi" + * + * returns: a malloced buffer or NULL on failure. + */ +unsigned char *next_protos_parse(unsigned short *outlen, const char *in) + { + size_t len; + unsigned char *out; + size_t i, start = 0; + + len = strlen(in); + if (len >= 65535) + return NULL; + + out = OPENSSL_malloc(strlen(in) + 1); + if (!out) + return NULL; + + for (i = 0; i <= len; ++i) + { + if (i == len || in[i] == ',') + { + if (i - start > 255) + { + OPENSSL_free(out); + return NULL; + } + out[start] = i - start; + start = i + 1; + } + else + out[i+1] = in[i]; + } + + *outlen = len + 1; + return out; + } +#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */ + /* * Platform-specific sections */ @@ -2767,7 +2841,7 @@ double app_tminterval(int stop,int usertime) if (proc==NULL) { - if (GetVersion() < 0x80000000) + if (check_winnt()) proc = OpenProcess(PROCESS_QUERY_INFORMATION,FALSE, GetCurrentProcessId()); if (proc==NULL) proc = (HANDLE)-1; @@ -3018,46 +3092,3 @@ int raw_write_stdout(const void *buf,int siz) int raw_write_stdout(const void *buf,int siz) { return write(fileno(stdout),buf,siz); } #endif - -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) -/* next_protos_parse parses a comma separated list of strings into a string - * in a format suitable for passing to SSL_CTX_set_next_protos_advertised. - * outlen: (output) set to the length of the resulting buffer on success. - * in: a NUL termianted string like "abc,def,ghi" - * - * returns: a malloced buffer or NULL on failure. - */ -unsigned char *next_protos_parse(unsigned short *outlen, const char *in) - { - size_t len; - unsigned char *out; - size_t i, start = 0; - - len = strlen(in); - if (len >= 65535) - return NULL; - - out = OPENSSL_malloc(strlen(in) + 1); - if (!out) - return NULL; - - for (i = 0; i <= len; ++i) - { - if (i == len || in[i] == ',') - { - if (i - start > 255) - { - OPENSSL_free(out); - return NULL; - } - out[start] = i - start; - start = i + 1; - } - else - out[i+1] = in[i]; - } - - *outlen = len + 1; - return out; - } -#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */ diff --git a/app/openssl/apps/apps.h b/app/openssl/apps/apps.h index 42072ec4..3aeb46c4 100644 --- a/app/openssl/apps/apps.h +++ b/app/openssl/apps/apps.h @@ -188,6 +188,7 @@ extern BIO *bio_err; do { CONF_modules_unload(1); destroy_ui_method(); \ OBJ_cleanup(); EVP_cleanup(); ENGINE_cleanup(); \ CRYPTO_cleanup_all_ex_data(); ERR_remove_thread_state(NULL); \ + RAND_cleanup(); \ ERR_free_strings(); zlib_cleanup();} while(0) # else # define apps_startup() \ @@ -198,6 +199,7 @@ extern BIO *bio_err; do { CONF_modules_unload(1); destroy_ui_method(); \ OBJ_cleanup(); EVP_cleanup(); \ CRYPTO_cleanup_all_ex_data(); ERR_remove_thread_state(NULL); \ + RAND_cleanup(); \ ERR_free_strings(); zlib_cleanup(); } while(0) # endif #endif @@ -317,6 +319,12 @@ int bio_to_mem(unsigned char **out, int maxlen, BIO *in); int pkey_ctrl_string(EVP_PKEY_CTX *ctx, char *value); int init_gen_str(BIO *err, EVP_PKEY_CTX **pctx, const char *algname, ENGINE *e, int do_param); +int do_X509_sign(BIO *err, X509 *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts); +int do_X509_REQ_sign(BIO *err, X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts); +int do_X509_CRL_sign(BIO *err, X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts); #ifndef OPENSSL_NO_PSK extern char *psk_key; #endif @@ -325,6 +333,10 @@ void jpake_client_auth(BIO *out, BIO *conn, const char *secret); void jpake_server_auth(BIO *out, BIO *conn, const char *secret); #endif +#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +unsigned char *next_protos_parse(unsigned short *outlen, const char *in); +#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */ + #define FORMAT_UNDEF 0 #define FORMAT_ASN1 1 #define FORMAT_TEXT 2 @@ -357,8 +369,7 @@ int raw_write_stdout(const void *,int); #define TM_START 0 #define TM_STOP 1 double app_tminterval (int stop,int usertime); -#endif -#ifndef OPENSSL_NO_NEXTPROTONEG -unsigned char *next_protos_parse(unsigned short *outlen, const char *in); +#define OPENSSL_NO_SSL_INTERN + #endif diff --git a/app/openssl/apps/ca.c b/app/openssl/apps/ca.c index 6b8b0ef8..1cf50e00 100644 --- a/app/openssl/apps/ca.c +++ b/app/openssl/apps/ca.c @@ -197,26 +197,30 @@ extern int EF_ALIGNMENT; static void lookup_fail(const char *name, const char *tag); static int certify(X509 **xret, char *infile,EVP_PKEY *pkey,X509 *x509, - const EVP_MD *dgst,STACK_OF(CONF_VALUE) *policy,CA_DB *db, + const EVP_MD *dgst,STACK_OF(OPENSSL_STRING) *sigopts, + STACK_OF(CONF_VALUE) *policy,CA_DB *db, BIGNUM *serial, char *subj,unsigned long chtype, int multirdn, int email_dn, char *startdate, char *enddate, long days, int batch, char *ext_sect, CONF *conf, int verbose, unsigned long certopt, unsigned long nameopt, int default_op, int ext_copy, int selfsign); static int certify_cert(X509 **xret, char *infile,EVP_PKEY *pkey,X509 *x509, - const EVP_MD *dgst,STACK_OF(CONF_VALUE) *policy, + const EVP_MD *dgst,STACK_OF(OPENSSL_STRING) *sigopts, + STACK_OF(CONF_VALUE) *policy, CA_DB *db, BIGNUM *serial, char *subj,unsigned long chtype, int multirdn, int email_dn, char *startdate, char *enddate, long days, int batch, char *ext_sect, CONF *conf,int verbose, unsigned long certopt, unsigned long nameopt, int default_op, int ext_copy, ENGINE *e); static int certify_spkac(X509 **xret, char *infile,EVP_PKEY *pkey,X509 *x509, - const EVP_MD *dgst,STACK_OF(CONF_VALUE) *policy, + const EVP_MD *dgst,STACK_OF(OPENSSL_STRING) *sigopts, + STACK_OF(CONF_VALUE) *policy, CA_DB *db, BIGNUM *serial,char *subj,unsigned long chtype, int multirdn, int email_dn, char *startdate, char *enddate, long days, char *ext_sect, CONF *conf, int verbose, unsigned long certopt, unsigned long nameopt, int default_op, int ext_copy); static void write_new_certificate(BIO *bp, X509 *x, int output_der, int notext); static int do_body(X509 **xret, EVP_PKEY *pkey, X509 *x509, const EVP_MD *dgst, + STACK_OF(OPENSSL_STRING) *sigopts, STACK_OF(CONF_VALUE) *policy, CA_DB *db, BIGNUM *serial,char *subj,unsigned long chtype, int multirdn, int email_dn, char *startdate, char *enddate, long days, int batch, int verbose, X509_REQ *req, char *ext_sect, CONF *conf, @@ -311,6 +315,7 @@ int MAIN(int argc, char **argv) const EVP_MD *dgst=NULL; STACK_OF(CONF_VALUE) *attribs=NULL; STACK_OF(X509) *cert_sk=NULL; + STACK_OF(OPENSSL_STRING) *sigopts = NULL; #undef BSIZE #define BSIZE 256 MS_STATIC char buf[3][BSIZE]; @@ -435,6 +440,15 @@ EF_ALIGNMENT=0; if (--argc < 1) goto bad; outdir= *(++argv); } + else if (strcmp(*argv,"-sigopt") == 0) + { + if (--argc < 1) + goto bad; + if (!sigopts) + sigopts = sk_OPENSSL_STRING_new_null(); + if (!sigopts || !sk_OPENSSL_STRING_push(sigopts, *(++argv))) + goto bad; + } else if (strcmp(*argv,"-notext") == 0) notext=1; else if (strcmp(*argv,"-batch") == 0) @@ -1170,8 +1184,9 @@ bad: if (spkac_file != NULL) { total++; - j=certify_spkac(&x,spkac_file,pkey,x509,dgst,attribs,db, - serial,subj,chtype,multirdn,email_dn,startdate,enddate,days,extensions, + j=certify_spkac(&x,spkac_file,pkey,x509,dgst,sigopts, + attribs,db, serial,subj,chtype,multirdn, + email_dn,startdate,enddate,days,extensions, conf,verbose,certopt,nameopt,default_op,ext_copy); if (j < 0) goto err; if (j > 0) @@ -1194,7 +1209,8 @@ bad: if (ss_cert_file != NULL) { total++; - j=certify_cert(&x,ss_cert_file,pkey,x509,dgst,attribs, + j=certify_cert(&x,ss_cert_file,pkey,x509,dgst,sigopts, + attribs, db,serial,subj,chtype,multirdn,email_dn,startdate,enddate,days,batch, extensions,conf,verbose, certopt, nameopt, default_op, ext_copy, e); @@ -1214,7 +1230,7 @@ bad: if (infile != NULL) { total++; - j=certify(&x,infile,pkey,x509p,dgst,attribs,db, + j=certify(&x,infile,pkey,x509p,dgst,sigopts, attribs,db, serial,subj,chtype,multirdn,email_dn,startdate,enddate,days,batch, extensions,conf,verbose, certopt, nameopt, default_op, ext_copy, selfsign); @@ -1234,7 +1250,7 @@ bad: for (i=0; iid; + unsigned long id = SSL_CIPHER_get_id(c); int id0 = (int)(id >> 24); int id1 = (int)((id >> 16) & 0xffL); int id2 = (int)((id >> 8) & 0xffL); diff --git a/app/openssl/apps/client.pem b/app/openssl/apps/client.pem index 307910e5..e7a47a73 100644 --- a/app/openssl/apps/client.pem +++ b/app/openssl/apps/client.pem @@ -1,24 +1,52 @@ -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Client test cert (512 bit) +subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Client Cert +issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA -----BEGIN CERTIFICATE----- -MIIB6TCCAVICAQIwDQYJKoZIhvcNAQEEBQAwWzELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYD -VQQDExJUZXN0IENBICgxMDI0IGJpdCkwHhcNOTcwNjA5MTM1NzU2WhcNOTgwNjA5 -MTM1NzU2WjBjMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDEaMBgG -A1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxIzAhBgNVBAMTGkNsaWVudCB0ZXN0IGNl -cnQgKDUxMiBiaXQpMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALtv55QyzG6i2Plw -Z1pah7++Gv8L5j6Hnyr/uTZE1NLG0ABDDexmq/R4KedLjFEIYjocDui+IXs62NNt -XrT8odkCAwEAATANBgkqhkiG9w0BAQQFAAOBgQBwtMmI7oGUG8nKmftQssATViH5 -NRRtoEw07DxJp/LfatHdrhqQB73eGdL5WILZJXk46Xz2e9WMSUjVCSYhdKxtflU3 -UR2Ajv1Oo0sTNdfz0wDqJNirLNtzyhhsaq8qMTrLwXrCP31VxBiigFSQSUFnZyTE -9TKwhS4GlwbtCfxSKQ== +MIID5zCCAs+gAwIBAgIJALnu1NlVpZ6yMA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT +VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt +ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZDELMAkG +A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU +RVNUSU5HIFBVUlBPU0VTIE9OTFkxGTAXBgNVBAMMEFRlc3QgQ2xpZW50IENlcnQw +ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC0ranbHRLcLVqN+0BzcZpY ++yOLqxzDWT1LD9eW1stC4NzXX9/DCtSIVyN7YIHdGLrIPr64IDdXXaMRzgZ2rOKs +lmHCAiFpO/ja99gGCJRxH0xwQatqAULfJVHeUhs7OEGOZc2nWifjqKvGfNTilP7D +nwi69ipQFq9oS19FmhwVHk2wg7KZGHI1qDyG04UrfCZMRitvS9+UVhPpIPjuiBi2 +x3/FZIpL5gXJvvFK6xHY63oq2asyzBATntBgnP4qJFWWcvRx24wF1PnZabxuVoL2 +bPnQ/KvONDrw3IdqkKhYNTul7jEcu3OlcZIMw+7DiaKJLAzKb/bBF5gm/pwW6As9 +AgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJYIZI +AYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQW +BBSZHKyLoTh7Mb409Zn/mK1ceSDAjDAfBgNVHSMEGDAWgBQ2w2yI55X+sL3szj49 +hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEAD0mL7PtPYgCEuDyOQSbLpeND5hVS +curxQdGnrJ6Acrhodb7E9ccATokeb0PLx6HBLQUicxhTZIQ9FbO43YkQcOU6C3BB +IlwskqmtN6+VmrQzNolHCDzvxNZs9lYL2VbGPGqVRyjZeHpoAlf9cQr8PgDb4d4b +vUx2KAhHQvV2nkmYvKyXcgnRuHggumF87mkxidriGAEFwH4qfOqetUg64WyxP7P2 +QLipm04SyQa7ONtIApfVXgHcE42Py4/f4arzCzMjKe3VyhGkS7nsT55X/fWgTaRm +CQPkO+H94P958WTvQDt77bQ+D3IvYaVvfil8n6HJMOJfFT0LJuSUbpSXJg== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- -MIIBOwIBAAJBALtv55QyzG6i2PlwZ1pah7++Gv8L5j6Hnyr/uTZE1NLG0ABDDexm -q/R4KedLjFEIYjocDui+IXs62NNtXrT8odkCAwEAAQJAbwXq0vJ/+uyEvsNgxLko -/V86mGXQ/KrSkeKlL0r4ENxjcyeMAGoKu6J9yMY7+X9+Zm4nxShNfTsf/+Freoe1 -HQIhAPOSm5Q1YI+KIsII2GeVJx1U69+wnd71OasIPakS1L1XAiEAxQAW+J3/JWE0 -ftEYakbhUOKL8tD1OaFZS71/5GdG7E8CIQCefUMmySSvwd6kC0VlATSWbW+d+jp/ -nWmM1KvqnAo5uQIhALqEADu5U1Wvt8UN8UDGBRPQulHWNycuNV45d3nnskWPAiAw -ueTyr6WsZ5+SD8g/Hy3xuvF3nPmJRH+rwvVihlcFOg== +MIIEpQIBAAKCAQEAtK2p2x0S3C1ajftAc3GaWPsji6scw1k9Sw/XltbLQuDc11/f +wwrUiFcje2CB3Ri6yD6+uCA3V12jEc4GdqzirJZhwgIhaTv42vfYBgiUcR9McEGr +agFC3yVR3lIbOzhBjmXNp1on46irxnzU4pT+w58IuvYqUBavaEtfRZocFR5NsIOy +mRhyNag8htOFK3wmTEYrb0vflFYT6SD47ogYtsd/xWSKS+YFyb7xSusR2Ot6Ktmr +MswQE57QYJz+KiRVlnL0cduMBdT52Wm8blaC9mz50PyrzjQ68NyHapCoWDU7pe4x +HLtzpXGSDMPuw4miiSwMym/2wReYJv6cFugLPQIDAQABAoIBAAZOyc9MhIwLSU4L +p4RgQvM4UVVe8/Id+3XTZ8NsXExJbWxXfIhiqGjaIfL8u4vsgRjcl+v1s/jo2/iT +KMab4o4D8gXD7UavQVDjtjb/ta79WL3SjRl2Uc9YjjMkyq6WmDNQeo2NKDdafCTB +1uzSJtLNipB8Z53ELPuHJhxX9QMHrMnuha49riQgXZ7buP9iQrHJFhImBjSzbxJx +L+TI6rkyLSf9Wi0Pd3L27Ob3QWNfNRYNSeTE+08eSRChkur5W0RuXAcuAICdQlCl +LBvWO/LmmvbzCqiDcgy/TliSb6CGGwgiNG7LJZmlkYNj8laGwalNlYZs3UrVv6NO +Br2loAECgYEA2kvCvPGj0Dg/6g7WhXDvAkEbcaL1tSeCxBbNH+6HS2UWMWvyTtCn +/bbD519QIdkvayy1QjEf32GV/UjUVmlULMLBcDy0DGjtL3+XpIhLKWDNxN1v1/ai +1oz23ZJCOgnk6K4qtFtlRS1XtynjA+rBetvYvLP9SKeFrnpzCgaA2r0CgYEA0+KX +1ACXDTNH5ySX3kMjSS9xdINf+OOw4CvPHFwbtc9aqk2HePlEsBTz5I/W3rKwXva3 +NqZ/bRqVVeZB/hHKFywgdUQk2Uc5z/S7Lw70/w1HubNTXGU06Ngb6zOFAo/o/TwZ +zTP1BMIKSOB6PAZPS3l+aLO4FRIRotfFhgRHOoECgYEAmiZbqt8cJaJDB/5YYDzC +mp3tSk6gIb936Q6M5VqkMYp9pIKsxhk0N8aDCnTU+kIK6SzWBpr3/d9Ecmqmfyq7 +5SvWO3KyVf0WWK9KH0abhOm2BKm2HBQvI0DB5u8sUx2/hsvOnjPYDISbZ11t0MtK +u35Zy89yMYcSsIYJjG/ROCUCgYEAgI2P9G5PNxEP5OtMwOsW84Y3Xat/hPAQFlI+ +HES+AzbFGWJkeT8zL2nm95tVkFP1sggZ7Kxjz3w7cpx7GX0NkbWSE9O+T51pNASV +tN1sQ3p5M+/a+cnlqgfEGJVvc7iAcXQPa3LEi5h2yPR49QYXAgG6cifn3dDSpmwn +SUI7PQECgYEApGCIIpSRPLAEHTGmP87RBL1smurhwmy2s/pghkvUkWehtxg0sGHh +kuaqDWcskogv+QC0sVdytiLSz8G0DwcEcsHK1Fkyb8A+ayiw6jWJDo2m9+IF4Fww +1Te6jFPYDESnbhq7+TLGgHGhtwcu5cnb4vSuYXGXKupZGzoLOBbv1Zw= -----END RSA PRIVATE KEY----- diff --git a/app/openssl/apps/cms.c b/app/openssl/apps/cms.c index d29a8849..5f77f8fb 100644 --- a/app/openssl/apps/cms.c +++ b/app/openssl/apps/cms.c @@ -136,6 +136,7 @@ int MAIN(int argc, char **argv) char *engine=NULL; #endif unsigned char *secret_key = NULL, *secret_keyid = NULL; + unsigned char *pwri_pass = NULL, *pwri_tmp = NULL; size_t secret_keylen = 0, secret_keyidlen = 0; ASN1_OBJECT *econtent_type = NULL; @@ -232,6 +233,8 @@ int MAIN(int argc, char **argv) else if (!strcmp(*args,"-camellia256")) cipher = EVP_camellia_256_cbc(); #endif + else if (!strcmp (*args, "-debug_decrypt")) + flags |= CMS_DEBUG_DECRYPT; else if (!strcmp (*args, "-text")) flags |= CMS_TEXT; else if (!strcmp (*args, "-nointern")) @@ -326,6 +329,13 @@ int MAIN(int argc, char **argv) } secret_keyidlen = (size_t)ltmp; } + else if (!strcmp(*args,"-pwri_password")) + { + if (!args[1]) + goto argerr; + args++; + pwri_pass = (unsigned char *)*args; + } else if (!strcmp(*args,"-econtent_type")) { if (!args[1]) @@ -559,7 +569,7 @@ int MAIN(int argc, char **argv) else if (operation == SMIME_DECRYPT) { - if (!recipfile && !keyfile && !secret_key) + if (!recipfile && !keyfile && !secret_key && !pwri_pass) { BIO_printf(bio_err, "No recipient certificate or key specified\n"); badarg = 1; @@ -567,7 +577,7 @@ int MAIN(int argc, char **argv) } else if (operation == SMIME_ENCRYPT) { - if (!*args && !secret_key) + if (!*args && !secret_key && !pwri_pass) { BIO_printf(bio_err, "No recipient(s) certificate(s) specified\n"); badarg = 1; @@ -618,7 +628,7 @@ int MAIN(int argc, char **argv) BIO_printf (bio_err, "-certsout file certificate output file\n"); BIO_printf (bio_err, "-signer file signer certificate file\n"); BIO_printf (bio_err, "-recip file recipient certificate file for decryption\n"); - BIO_printf (bio_err, "-skeyid use subject key identifier\n"); + BIO_printf (bio_err, "-keyid use subject key identifier\n"); BIO_printf (bio_err, "-in file input file\n"); BIO_printf (bio_err, "-inform arg input format SMIME (default), PEM or DER\n"); BIO_printf (bio_err, "-inkey file input private key (if not signer or recipient)\n"); @@ -917,6 +927,17 @@ int MAIN(int argc, char **argv) secret_key = NULL; secret_keyid = NULL; } + if (pwri_pass) + { + pwri_tmp = (unsigned char *)BUF_strdup((char *)pwri_pass); + if (!pwri_tmp) + goto end; + if (!CMS_add0_recipient_password(cms, + -1, NID_undef, NID_undef, + pwri_tmp, -1, NULL)) + goto end; + pwri_tmp = NULL; + } if (!(flags & CMS_STREAM)) { if (!CMS_final(cms, in, NULL, flags)) @@ -1020,6 +1041,8 @@ int MAIN(int argc, char **argv) ret = 4; if (operation == SMIME_DECRYPT) { + if (flags & CMS_DEBUG_DECRYPT) + CMS_decrypt(cms, NULL, NULL, NULL, NULL, flags); if (secret_key) { @@ -1043,6 +1066,16 @@ int MAIN(int argc, char **argv) } } + if (pwri_pass) + { + if (!CMS_decrypt_set1_password(cms, pwri_pass, -1)) + { + BIO_puts(bio_err, + "Error decrypting CMS using password\n"); + goto end; + } + } + if (!CMS_decrypt(cms, NULL, NULL, indata, out, flags)) { BIO_printf(bio_err, "Error decrypting CMS structure\n"); @@ -1167,6 +1200,8 @@ end: OPENSSL_free(secret_key); if (secret_keyid) OPENSSL_free(secret_keyid); + if (pwri_tmp) + OPENSSL_free(pwri_tmp); if (econtent_type) ASN1_OBJECT_free(econtent_type); if (rr) diff --git a/app/openssl/apps/crl.c b/app/openssl/apps/crl.c index c395b2af..8797d300 100644 --- a/app/openssl/apps/crl.c +++ b/app/openssl/apps/crl.c @@ -81,6 +81,9 @@ static const char *crl_usage[]={ " -in arg - input file - default stdin\n", " -out arg - output file - default stdout\n", " -hash - print hash value\n", +#ifndef OPENSSL_NO_MD5 +" -hash_old - print old-style (MD5) hash value\n", +#endif " -fingerprint - print the crl fingerprint\n", " -issuer - print issuer DN\n", " -lastupdate - lastUpdate field\n", @@ -108,6 +111,9 @@ int MAIN(int argc, char **argv) int informat,outformat; char *infile=NULL,*outfile=NULL; int hash=0,issuer=0,lastupdate=0,nextupdate=0,noout=0,text=0; +#ifndef OPENSSL_NO_MD5 + int hash_old=0; +#endif int fingerprint = 0, crlnumber = 0; const char **pp; X509_STORE *store = NULL; @@ -192,6 +198,10 @@ int MAIN(int argc, char **argv) text = 1; else if (strcmp(*argv,"-hash") == 0) hash= ++num; +#ifndef OPENSSL_NO_MD5 + else if (strcmp(*argv,"-hash_old") == 0) + hash_old= ++num; +#endif else if (strcmp(*argv,"-nameopt") == 0) { if (--argc < 1) goto bad; @@ -304,6 +314,14 @@ bad: BIO_printf(bio_out,"%08lx\n", X509_NAME_hash(X509_CRL_get_issuer(x))); } +#ifndef OPENSSL_NO_MD5 + if (hash_old == i) + { + BIO_printf(bio_out,"%08lx\n", + X509_NAME_hash_old( + X509_CRL_get_issuer(x))); + } +#endif if (lastupdate == i) { BIO_printf(bio_out,"lastUpdate="); diff --git a/app/openssl/apps/dgst.c b/app/openssl/apps/dgst.c index 9bf38ce7..f4aec779 100644 --- a/app/openssl/apps/dgst.c +++ b/app/openssl/apps/dgst.c @@ -127,6 +127,7 @@ int MAIN(int argc, char **argv) #endif char *hmac_key=NULL; char *mac_name=NULL; + int non_fips_allow = 0; STACK_OF(OPENSSL_STRING) *sigopts = NULL, *macopts = NULL; apps_startup(); @@ -215,6 +216,10 @@ int MAIN(int argc, char **argv) out_bin = 1; else if (strcmp(*argv,"-d") == 0) debug=1; + else if (!strcmp(*argv,"-fips-fingerprint")) + hmac_key = "etaonrishdlcupfm"; + else if (strcmp(*argv,"-non-fips-allow") == 0) + non_fips_allow=1; else if (!strcmp(*argv,"-hmac")) { if (--argc < 1) @@ -395,6 +400,13 @@ int MAIN(int argc, char **argv) goto end; } + if (non_fips_allow) + { + EVP_MD_CTX *md_ctx; + BIO_get_md_ctx(bmd,&md_ctx); + EVP_MD_CTX_set_flags(md_ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + } + if (hmac_key) { sigkey = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, e, @@ -415,9 +427,9 @@ int MAIN(int argc, char **argv) goto end; } if (do_verify) - r = EVP_DigestVerifyInit(mctx, &pctx, md, e, sigkey); + r = EVP_DigestVerifyInit(mctx, &pctx, md, NULL, sigkey); else - r = EVP_DigestSignInit(mctx, &pctx, md, e, sigkey); + r = EVP_DigestSignInit(mctx, &pctx, md, NULL, sigkey); if (!r) { BIO_printf(bio_err, "Error setting context\n"); diff --git a/app/openssl/apps/dhparam.c b/app/openssl/apps/dhparam.c index b47097cb..1297d6fb 100644 --- a/app/openssl/apps/dhparam.c +++ b/app/openssl/apps/dhparam.c @@ -332,7 +332,6 @@ bad: BIO_printf(bio_err,"This is going to take a long time\n"); if(!dh || !DH_generate_parameters_ex(dh, num, g, &cb)) { - if(dh) DH_free(dh); ERR_print_errors(bio_err); goto end; } diff --git a/app/openssl/apps/dsaparam.c b/app/openssl/apps/dsaparam.c index fe72c1d3..683d5139 100644 --- a/app/openssl/apps/dsaparam.c +++ b/app/openssl/apps/dsaparam.c @@ -326,6 +326,7 @@ bad: goto end; } #endif + ERR_print_errors(bio_err); BIO_printf(bio_err,"Error, DSA key generation failed\n"); goto end; } @@ -429,13 +430,19 @@ bad: assert(need_rand); if ((dsakey=DSAparams_dup(dsa)) == NULL) goto end; - if (!DSA_generate_key(dsakey)) goto end; + if (!DSA_generate_key(dsakey)) + { + ERR_print_errors(bio_err); + DSA_free(dsakey); + goto end; + } if (outformat == FORMAT_ASN1) i=i2d_DSAPrivateKey_bio(out,dsakey); else if (outformat == FORMAT_PEM) i=PEM_write_bio_DSAPrivateKey(out,dsakey,NULL,NULL,0,NULL,NULL); else { BIO_printf(bio_err,"bad output format specified for outfile\n"); + DSA_free(dsakey); goto end; } DSA_free(dsakey); diff --git a/app/openssl/apps/ecparam.c b/app/openssl/apps/ecparam.c index 465480be..976ebef1 100644 --- a/app/openssl/apps/ecparam.c +++ b/app/openssl/apps/ecparam.c @@ -105,7 +105,7 @@ * in the asn1 der encoding * possible values: named_curve (default) * explicit - * -no_seed - if 'explicit' parameters are choosen do not use the seed + * -no_seed - if 'explicit' parameters are chosen do not use the seed * -genkey - generate ec key * -rand file - files to use for random number input * -engine e - use engine e, possibly a hardware device @@ -286,7 +286,7 @@ bad: BIO_printf(bio_err, " " " explicit\n"); BIO_printf(bio_err, " -no_seed if 'explicit'" - " parameters are choosen do not" + " parameters are chosen do not" " use the seed\n"); BIO_printf(bio_err, " -genkey generate ec" " key\n"); diff --git a/app/openssl/apps/enc.c b/app/openssl/apps/enc.c index 076225c4..719acc32 100644 --- a/app/openssl/apps/enc.c +++ b/app/openssl/apps/enc.c @@ -129,6 +129,7 @@ int MAIN(int argc, char **argv) char *engine = NULL; #endif const EVP_MD *dgst=NULL; + int non_fips_allow = 0; apps_startup(); @@ -281,6 +282,8 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; md= *(++argv); } + else if (strcmp(*argv,"-non-fips-allow") == 0) + non_fips_allow = 1; else if ((argv[0][0] == '-') && ((c=EVP_get_cipherbyname(&(argv[0][1]))) != NULL)) { @@ -589,6 +592,11 @@ bad: */ BIO_get_cipher_ctx(benc, &ctx); + + if (non_fips_allow) + EVP_CIPHER_CTX_set_flags(ctx, + EVP_CIPH_FLAG_NON_FIPS_ALLOW); + if (!EVP_CipherInit_ex(ctx, cipher, NULL, NULL, NULL, enc)) { BIO_printf(bio_err, "Error setting cipher %s\n", diff --git a/app/openssl/apps/genrsa.c b/app/openssl/apps/genrsa.c index 37e93109..ece114c8 100644 --- a/app/openssl/apps/genrsa.c +++ b/app/openssl/apps/genrsa.c @@ -78,7 +78,7 @@ #include #include -#define DEFBITS 512 +#define DEFBITS 1024 #undef PROG #define PROG genrsa_main diff --git a/app/openssl/apps/md4.c b/app/openssl/apps/md4.c deleted file mode 100644 index 141415ad..00000000 --- a/app/openssl/apps/md4.c +++ /dev/null @@ -1,127 +0,0 @@ -/* crypto/md4/md4.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#define BUFSIZE 1024*16 - -void do_fp(FILE *f); -void pt(unsigned char *md); -#if !defined(_OSD_POSIX) && !defined(__DJGPP__) -int read(int, void *, unsigned int); -#endif - -int main(int argc, char **argv) - { - int i,err=0; - FILE *IN; - - if (argc == 1) - { - do_fp(stdin); - } - else - { - for (i=1; i use specified digest in the request"); + BIO_printf (bio_err, "- use specified digest in the request\n"); goto end; } diff --git a/app/openssl/apps/openssl.c b/app/openssl/apps/openssl.c index 10689573..a9fa1e3f 100644 --- a/app/openssl/apps/openssl.c +++ b/app/openssl/apps/openssl.c @@ -117,6 +117,7 @@ #include "apps.h" #include #include +#include #include #include #include @@ -129,6 +130,9 @@ #include "progs.h" #include "s_apps.h" #include +#ifdef OPENSSL_FIPS +#include +#endif /* The LHASH callbacks ("hash" & "cmp") have been replaced by functions with the * base prototypes (we cast each variable inside the function to the required @@ -310,6 +314,19 @@ int main(int Argc, char *ARGV[]) CRYPTO_set_locking_callback(lock_dbg_cb); } + if(getenv("OPENSSL_FIPS")) { +#ifdef OPENSSL_FIPS + if (!FIPS_mode_set(1)) { + ERR_load_crypto_strings(); + ERR_print_errors(BIO_new_fp(stderr,BIO_NOCLOSE)); + EXIT(1); + } +#else + fprintf(stderr, "FIPS mode not supported.\n"); + EXIT(1); +#endif + } + apps_startup(); /* Lets load up our environment a little */ diff --git a/app/openssl/apps/openssl.cnf b/app/openssl/apps/openssl.cnf index 9d2cd5bf..18760c6e 100644 --- a/app/openssl/apps/openssl.cnf +++ b/app/openssl/apps/openssl.cnf @@ -145,7 +145,7 @@ localityName = Locality Name (eg, city) organizationalUnitName = Organizational Unit Name (eg, section) #organizationalUnitName_default = -commonName = Common Name (eg, YOUR name) +commonName = Common Name (e.g. server FQDN or YOUR name) commonName_max = 64 emailAddress = Email Address diff --git a/app/openssl/apps/pkcs12.c b/app/openssl/apps/pkcs12.c index b54c6f84..4d62a7b8 100644 --- a/app/openssl/apps/pkcs12.c +++ b/app/openssl/apps/pkcs12.c @@ -112,7 +112,7 @@ int MAIN(int argc, char **argv) int maciter = PKCS12_DEFAULT_ITER; int twopass = 0; int keytype = 0; - int cert_pbe = NID_pbe_WithSHA1And40BitRC2_CBC; + int cert_pbe; int key_pbe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; int ret = 1; int macver = 1; @@ -130,6 +130,13 @@ int MAIN(int argc, char **argv) apps_startup(); +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + cert_pbe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; + else +#endif + cert_pbe = NID_pbe_WithSHA1And40BitRC2_CBC; + enc = EVP_des_ede3_cbc(); if (bio_err == NULL ) bio_err = BIO_new_fp (stderr, BIO_NOCLOSE); diff --git a/app/openssl/apps/progs.h b/app/openssl/apps/progs.h index 728bb6dd..dd2298b5 100644 --- a/app/openssl/apps/progs.h +++ b/app/openssl/apps/progs.h @@ -46,6 +46,7 @@ extern int engine_main(int argc,char *argv[]); extern int ocsp_main(int argc,char *argv[]); extern int prime_main(int argc,char *argv[]); extern int ts_main(int argc,char *argv[]); +extern int srp_main(int argc,char *argv[]); #define FUNC_TYPE_GENERAL 1 #define FUNC_TYPE_MD 2 @@ -149,6 +150,9 @@ FUNCTION functions[] = { #if 0 /* ANDROID */ {FUNC_TYPE_GENERAL,"ts",ts_main}, #endif +#ifndef OPENSSL_NO_SRP + {FUNC_TYPE_GENERAL,"srp",srp_main}, +#endif #ifndef OPENSSL_NO_MD2 {FUNC_TYPE_MD,"md2",dgst_main}, #endif diff --git a/app/openssl/apps/progs.pl b/app/openssl/apps/progs.pl index de6fdeab..39ca8f71 100644 --- a/app/openssl/apps/progs.pl +++ b/app/openssl/apps/progs.pl @@ -51,6 +51,8 @@ foreach (@ARGV) { print "#ifndef OPENSSL_NO_CMS\n${str}#endif\n"; } elsif ( ($_ =~ /^ocsp$/)) { print "#ifndef OPENSSL_NO_OCSP\n${str}#endif\n"; } + elsif ( ($_ =~ /^srp$/)) + { print "#ifndef OPENSSL_NO_SRP\n${str}#endif\n"; } else { print $str; } } diff --git a/app/openssl/apps/req.c b/app/openssl/apps/req.c index 820cd18f..5e034a85 100644 --- a/app/openssl/apps/req.c +++ b/app/openssl/apps/req.c @@ -165,7 +165,7 @@ int MAIN(int argc, char **argv) EVP_PKEY_CTX *genctx = NULL; const char *keyalg = NULL; char *keyalgstr = NULL; - STACK_OF(OPENSSL_STRING) *pkeyopts = NULL; + STACK_OF(OPENSSL_STRING) *pkeyopts = NULL, *sigopts = NULL; EVP_PKEY *pkey=NULL; int i=0,badops=0,newreq=0,verbose=0,pkey_type=-1; long newkey = -1; @@ -310,6 +310,15 @@ int MAIN(int argc, char **argv) if (!pkeyopts || !sk_OPENSSL_STRING_push(pkeyopts, *(++argv))) goto bad; } + else if (strcmp(*argv,"-sigopt") == 0) + { + if (--argc < 1) + goto bad; + if (!sigopts) + sigopts = sk_OPENSSL_STRING_new_null(); + if (!sigopts || !sk_OPENSSL_STRING_push(sigopts, *(++argv))) + goto bad; + } else if (strcmp(*argv,"-batch") == 0) batch=1; else if (strcmp(*argv,"-newhdr") == 0) @@ -635,6 +644,11 @@ bad: if (inrand) app_RAND_load_files(inrand); + if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey)) + { + newkey=DEFAULT_KEY_LENGTH; + } + if (keyalg) { genctx = set_keygen_ctx(bio_err, keyalg, &pkey_type, &newkey, @@ -643,12 +657,6 @@ bad: goto end; } - if (newkey <= 0) - { - if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey)) - newkey=DEFAULT_KEY_LENGTH; - } - if (newkey < MIN_KEY_LENGTH && (pkey_type == EVP_PKEY_RSA || pkey_type == EVP_PKEY_DSA)) { BIO_printf(bio_err,"private key length is too short,\n"); @@ -858,8 +866,9 @@ loop: extensions); goto end; } - - if (!(i=X509_sign(x509ss,pkey,digest))) + + i=do_X509_sign(bio_err, x509ss, pkey, digest, sigopts); + if (!i) { ERR_print_errors(bio_err); goto end; @@ -883,7 +892,8 @@ loop: req_exts); goto end; } - if (!(i=X509_REQ_sign(req,pkey,digest))) + i=do_X509_REQ_sign(bio_err, req, pkey, digest, sigopts); + if (!i) { ERR_print_errors(bio_err); goto end; @@ -1084,6 +1094,8 @@ end: EVP_PKEY_CTX_free(genctx); if (pkeyopts) sk_OPENSSL_STRING_free(pkeyopts); + if (sigopts) + sk_OPENSSL_STRING_free(sigopts); #ifndef OPENSSL_NO_ENGINE if (gen_eng) ENGINE_free(gen_eng); @@ -1636,6 +1648,8 @@ static EVP_PKEY_CTX *set_keygen_ctx(BIO *err, const char *gstr, int *pkey_type, keylen = atol(p + 1); *pkeylen = keylen; } + else + keylen = *pkeylen; } else if (p) paramfile = p + 1; @@ -1756,3 +1770,68 @@ static int genpkey_cb(EVP_PKEY_CTX *ctx) #endif return 1; } + +static int do_sign_init(BIO *err, EVP_MD_CTX *ctx, EVP_PKEY *pkey, + const EVP_MD *md, STACK_OF(OPENSSL_STRING) *sigopts) + { + EVP_PKEY_CTX *pkctx = NULL; + int i; + EVP_MD_CTX_init(ctx); + if (!EVP_DigestSignInit(ctx, &pkctx, md, NULL, pkey)) + return 0; + for (i = 0; i < sk_OPENSSL_STRING_num(sigopts); i++) + { + char *sigopt = sk_OPENSSL_STRING_value(sigopts, i); + if (pkey_ctrl_string(pkctx, sigopt) <= 0) + { + BIO_printf(err, "parameter error \"%s\"\n", sigopt); + ERR_print_errors(bio_err); + return 0; + } + } + return 1; + } + +int do_X509_sign(BIO *err, X509 *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts) + { + int rv; + EVP_MD_CTX mctx; + EVP_MD_CTX_init(&mctx); + rv = do_sign_init(err, &mctx, pkey, md, sigopts); + if (rv > 0) + rv = X509_sign_ctx(x, &mctx); + EVP_MD_CTX_cleanup(&mctx); + return rv > 0 ? 1 : 0; + } + + +int do_X509_REQ_sign(BIO *err, X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts) + { + int rv; + EVP_MD_CTX mctx; + EVP_MD_CTX_init(&mctx); + rv = do_sign_init(err, &mctx, pkey, md, sigopts); + if (rv > 0) + rv = X509_REQ_sign_ctx(x, &mctx); + EVP_MD_CTX_cleanup(&mctx); + return rv > 0 ? 1 : 0; + } + + + +int do_X509_CRL_sign(BIO *err, X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md, + STACK_OF(OPENSSL_STRING) *sigopts) + { + int rv; + EVP_MD_CTX mctx; + EVP_MD_CTX_init(&mctx); + rv = do_sign_init(err, &mctx, pkey, md, sigopts); + if (rv > 0) + rv = X509_CRL_sign_ctx(x, &mctx); + EVP_MD_CTX_cleanup(&mctx); + return rv > 0 ? 1 : 0; + } + + diff --git a/app/openssl/apps/s_cb.c b/app/openssl/apps/s_cb.c index c4f55122..84c3b447 100644 --- a/app/openssl/apps/s_cb.c +++ b/app/openssl/apps/s_cb.c @@ -237,8 +237,8 @@ int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file) /* If we are using DSA, we can copy the parameters from * the private key */ - - + + /* Now we know that a key and cert have been set against * the SSL context */ if (!SSL_CTX_check_private_key(ctx)) @@ -357,6 +357,12 @@ void MS_CALLBACK msg_cb(int write_p, int version, int content_type, const void * case TLS1_VERSION: str_version = "TLS 1.0 "; break; + case TLS1_1_VERSION: + str_version = "TLS 1.1 "; + break; + case TLS1_2_VERSION: + str_version = "TLS 1.2 "; + break; case DTLS1_VERSION: str_version = "DTLS 1.0 "; break; @@ -430,6 +436,8 @@ void MS_CALLBACK msg_cb(int write_p, int version, int content_type, const void * if (version == SSL3_VERSION || version == TLS1_VERSION || + version == TLS1_1_VERSION || + version == TLS1_2_VERSION || version == DTLS1_VERSION || version == DTLS1_BAD_VER) { @@ -549,6 +557,9 @@ void MS_CALLBACK msg_cb(int write_p, int version, int content_type, const void * case 114: str_details2 = " bad_certificate_hash_value"; break; + case 115: + str_details2 = " unknown_psk_identity"; + break; } } } @@ -597,6 +608,26 @@ void MS_CALLBACK msg_cb(int write_p, int version, int content_type, const void * } } } + +#ifndef OPENSSL_NO_HEARTBEATS + if (content_type == 24) /* Heartbeat */ + { + str_details1 = ", Heartbeat"; + + if (len > 0) + { + switch (((const unsigned char*)buf)[0]) + { + case 1: + str_details1 = ", HeartbeatRequest"; + break; + case 2: + str_details1 = ", HeartbeatResponse"; + break; + } + } + } +#endif } BIO_printf(bio, "%s %s%s [length %04lx]%s%s\n", str_write_p, str_version, str_content_type, (unsigned long)len, str_details1, str_details2); @@ -657,6 +688,22 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type, extname = "status request"; break; + case TLSEXT_TYPE_user_mapping: + extname = "user mapping"; + break; + + case TLSEXT_TYPE_client_authz: + extname = "client authz"; + break; + + case TLSEXT_TYPE_server_authz: + extname = "server authz"; + break; + + case TLSEXT_TYPE_cert_type: + extname = "cert type"; + break; + case TLSEXT_TYPE_elliptic_curves: extname = "elliptic curves"; break; @@ -665,12 +712,28 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type, extname = "EC point formats"; break; + case TLSEXT_TYPE_srp: + extname = "SRP"; + break; + + case TLSEXT_TYPE_signature_algorithms: + extname = "signature algorithms"; + break; + + case TLSEXT_TYPE_use_srtp: + extname = "use SRTP"; + break; + + case TLSEXT_TYPE_heartbeat: + extname = "heartbeat"; + break; + case TLSEXT_TYPE_session_ticket: - extname = "server ticket"; + extname = "session ticket"; break; - case TLSEXT_TYPE_renegotiate: - extname = "renegotiate"; + case TLSEXT_TYPE_renegotiate: + extname = "renegotiation info"; break; #ifdef TLSEXT_TYPE_opaque_prf_input @@ -678,6 +741,11 @@ void MS_CALLBACK tlsext_cb(SSL *s, int client_server, int type, extname = "opaque PRF input"; break; #endif +#ifdef TLSEXT_TYPE_next_proto_neg + case TLSEXT_TYPE_next_proto_neg: + extname = "next protocol"; + break; +#endif default: extname = "unknown"; diff --git a/app/openssl/apps/s_client.c b/app/openssl/apps/s_client.c index b951513d..0c705803 100644 --- a/app/openssl/apps/s_client.c +++ b/app/openssl/apps/s_client.c @@ -163,6 +163,9 @@ typedef unsigned int u_int; #include #include #include +#ifndef OPENSSL_NO_SRP +#include +#endif #include "s_apps.h" #include "timeouts.h" @@ -203,6 +206,9 @@ static int c_status_req=0; static int c_msg=0; static int c_showcerts=0; +static char *keymatexportlabel=NULL; +static int keymatexportlen=20; + static void sc_usage(void); static void print_stuff(BIO *berr,SSL *con,int full); #ifndef OPENSSL_NO_TLSEXT @@ -315,13 +321,22 @@ static void sc_usage(void) # ifndef OPENSSL_NO_JPAKE BIO_printf(bio_err," -jpake arg - JPAKE secret to use\n"); # endif +#endif +#ifndef OPENSSL_NO_SRP + BIO_printf(bio_err," -srpuser user - SRP authentification for 'user'\n"); + BIO_printf(bio_err," -srppass arg - password for 'user'\n"); + BIO_printf(bio_err," -srp_lateuser - SRP username into second ClientHello message\n"); + BIO_printf(bio_err," -srp_moregroups - Tolerate other than the known g N values.\n"); + BIO_printf(bio_err," -srp_strength int - minimal mength in bits for N (default %d).\n",SRP_MINIMAL_N); #endif BIO_printf(bio_err," -ssl2 - just use SSLv2\n"); BIO_printf(bio_err," -ssl3 - just use SSLv3\n"); + BIO_printf(bio_err," -tls1_2 - just use TLSv1.2\n"); + BIO_printf(bio_err," -tls1_1 - just use TLSv1.1\n"); BIO_printf(bio_err," -tls1 - just use TLSv1\n"); BIO_printf(bio_err," -dtls1 - just use DTLSv1\n"); BIO_printf(bio_err," -mtu - set the link layer MTU\n"); - BIO_printf(bio_err," -no_tls1/-no_ssl3/-no_ssl2 - turn off that protocol\n"); + BIO_printf(bio_err," -no_tls1_2/-no_tls1_1/-no_tls1/-no_ssl3/-no_ssl2 - turn off that protocol\n"); BIO_printf(bio_err," -bugs - Switch on all SSL implementation bug workarounds\n"); BIO_printf(bio_err," -serverpref - Use server's cipher preferences (only SSLv2)\n"); BIO_printf(bio_err," -cipher - preferred cipher to use, use the 'openssl ciphers'\n"); @@ -344,10 +359,17 @@ static void sc_usage(void) BIO_printf(bio_err," -no_ticket - disable use of RFC4507bis session tickets\n"); # ifndef OPENSSL_NO_NEXTPROTONEG BIO_printf(bio_err," -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n"); + BIO_printf(bio_err," -alpn arg - enable ALPN extension, considering named protocols supported (comma-separated list)\n"); # endif - BIO_printf(bio_err," -cutthrough - enable 1-RTT full-handshake for strong ciphers\n"); #endif + BIO_printf(bio_err," -cutthrough - enable 1-RTT full-handshake for strong ciphers\n"); + BIO_printf(bio_err," -no_record_splitting - disable 1/n-1 record splitting in CBC mode\n"); BIO_printf(bio_err," -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n"); +#ifndef OPENSSL_NO_SRTP + BIO_printf(bio_err," -use_srtp profiles - Offer SRTP key management with a colon-separated profile list\n"); +#endif + BIO_printf(bio_err," -keymatexport label - Export keying material using label\n"); + BIO_printf(bio_err," -keymatexportlen len - Export len bytes of keying material (default 20)\n"); } #ifndef OPENSSL_NO_TLSEXT @@ -371,6 +393,124 @@ static int MS_CALLBACK ssl_servername_cb(SSL *s, int *ad, void *arg) return SSL_TLSEXT_ERR_OK; } +#ifndef OPENSSL_NO_SRP + +/* This is a context that we pass to all callbacks */ +typedef struct srp_arg_st + { + char *srppassin; + char *srplogin; + int msg; /* copy from c_msg */ + int debug; /* copy from c_debug */ + int amp; /* allow more groups */ + int strength /* minimal size for N */ ; + } SRP_ARG; + +#define SRP_NUMBER_ITERATIONS_FOR_PRIME 64 + +static int srp_Verify_N_and_g(BIGNUM *N, BIGNUM *g) + { + BN_CTX *bn_ctx = BN_CTX_new(); + BIGNUM *p = BN_new(); + BIGNUM *r = BN_new(); + int ret = + g != NULL && N != NULL && bn_ctx != NULL && BN_is_odd(N) && + BN_is_prime_ex(N, SRP_NUMBER_ITERATIONS_FOR_PRIME, bn_ctx, NULL) && + p != NULL && BN_rshift1(p, N) && + + /* p = (N-1)/2 */ + BN_is_prime_ex(p, SRP_NUMBER_ITERATIONS_FOR_PRIME, bn_ctx, NULL) && + r != NULL && + + /* verify g^((N-1)/2) == -1 (mod N) */ + BN_mod_exp(r, g, p, N, bn_ctx) && + BN_add_word(r, 1) && + BN_cmp(r, N) == 0; + + if(r) + BN_free(r); + if(p) + BN_free(p); + if(bn_ctx) + BN_CTX_free(bn_ctx); + return ret; + } + +/* This callback is used here for two purposes: + - extended debugging + - making some primality tests for unknown groups + The callback is only called for a non default group. + + An application does not need the call back at all if + only the stanard groups are used. In real life situations, + client and server already share well known groups, + thus there is no need to verify them. + Furthermore, in case that a server actually proposes a group that + is not one of those defined in RFC 5054, it is more appropriate + to add the group to a static list and then compare since + primality tests are rather cpu consuming. +*/ + +static int MS_CALLBACK ssl_srp_verify_param_cb(SSL *s, void *arg) + { + SRP_ARG *srp_arg = (SRP_ARG *)arg; + BIGNUM *N = NULL, *g = NULL; + if (!(N = SSL_get_srp_N(s)) || !(g = SSL_get_srp_g(s))) + return 0; + if (srp_arg->debug || srp_arg->msg || srp_arg->amp == 1) + { + BIO_printf(bio_err, "SRP parameters:\n"); + BIO_printf(bio_err,"\tN="); BN_print(bio_err,N); + BIO_printf(bio_err,"\n\tg="); BN_print(bio_err,g); + BIO_printf(bio_err,"\n"); + } + + if (SRP_check_known_gN_param(g,N)) + return 1; + + if (srp_arg->amp == 1) + { + if (srp_arg->debug) + BIO_printf(bio_err, "SRP param N and g are not known params, going to check deeper.\n"); + +/* The srp_moregroups is a real debugging feature. + Implementors should rather add the value to the known ones. + The minimal size has already been tested. +*/ + if (BN_num_bits(g) <= BN_BITS && srp_Verify_N_and_g(N,g)) + return 1; + } + BIO_printf(bio_err, "SRP param N and g rejected.\n"); + return 0; + } + +#define PWD_STRLEN 1024 + +static char * MS_CALLBACK ssl_give_srp_client_pwd_cb(SSL *s, void *arg) + { + SRP_ARG *srp_arg = (SRP_ARG *)arg; + char *pass = (char *)OPENSSL_malloc(PWD_STRLEN+1); + PW_CB_DATA cb_tmp; + int l; + + cb_tmp.password = (char *)srp_arg->srppassin; + cb_tmp.prompt_info = "SRP user"; + if ((l = password_callback(pass, PWD_STRLEN, 0, &cb_tmp))<0) + { + BIO_printf (bio_err, "Can't read Password\n"); + OPENSSL_free(pass); + return NULL; + } + *(pass+l)= '\0'; + + return pass; + } + +#endif +#ifndef OPENSSL_NO_SRTP + char *srtp_profiles = NULL; +#endif + # ifndef OPENSSL_NO_NEXTPROTONEG /* This the context that we pass to next_proto_cb */ typedef struct tlsextnextprotoctx_st { @@ -422,6 +562,9 @@ int MAIN(int argc, char **argv) { unsigned int off=0, clr=0; SSL *con=NULL; +#ifndef OPENSSL_NO_KRB5 + KSSL_CTX *kctx; +#endif int s,k,width,state=0; char *cbuf=NULL,*sbuf=NULL,*mbuf=NULL; int cbuf_len,cbuf_off; @@ -437,7 +580,7 @@ int MAIN(int argc, char **argv) EVP_PKEY *key = NULL; char *CApath=NULL,*CAfile=NULL,*cipher=NULL; int reconnect=0,badop=0,verify=SSL_VERIFY_NONE,bugs=0; - int cutthrough=0; + int cutthrough=0, no_record_splitting=0; int crlf=0; int write_tty,read_tty,write_ssl,read_ssl,tty_on,ssl_pending; SSL_CTX *ctx=NULL; @@ -452,6 +595,7 @@ int MAIN(int argc, char **argv) char *inrand=NULL; int mbuf_len=0; struct timeval timeout, *timeoutp; + int ssl_mode; #ifndef OPENSSL_NO_ENGINE char *engine_id=NULL; char *ssl_client_engine_id=NULL; @@ -470,6 +614,7 @@ int MAIN(int argc, char **argv) {NULL,0}; # ifndef OPENSSL_NO_NEXTPROTONEG const char *next_proto_neg_in = NULL; + const char *alpn_in = NULL; # endif #endif char *sess_in = NULL; @@ -481,14 +626,13 @@ int MAIN(int argc, char **argv) #ifndef OPENSSL_NO_JPAKE char *jpake_secret = NULL; #endif +#ifndef OPENSSL_NO_SRP + char * srppass = NULL; + int srp_lateuser = 0; + SRP_ARG srp_arg = {NULL,NULL,0,0,0,1024}; +#endif -#if !defined(OPENSSL_NO_SSL2) && !defined(OPENSSL_NO_SSL3) meth=SSLv23_client_method(); -#elif !defined(OPENSSL_NO_SSL3) - meth=SSLv3_client_method(); -#elif !defined(OPENSSL_NO_SSL2) - meth=SSLv2_client_method(); -#endif apps_startup(); c_Pause=0; @@ -623,13 +767,44 @@ int MAIN(int argc, char **argv) psk_key=*(++argv); for (j = 0; j < strlen(psk_key); j++) { - if (isxdigit((int)psk_key[j])) + if (isxdigit((unsigned char)psk_key[j])) continue; BIO_printf(bio_err,"Not a hex number '%s'\n",*argv); goto bad; } } #endif +#ifndef OPENSSL_NO_SRP + else if (strcmp(*argv,"-srpuser") == 0) + { + if (--argc < 1) goto bad; + srp_arg.srplogin= *(++argv); + meth=TLSv1_client_method(); + } + else if (strcmp(*argv,"-srppass") == 0) + { + if (--argc < 1) goto bad; + srppass= *(++argv); + meth=TLSv1_client_method(); + } + else if (strcmp(*argv,"-srp_strength") == 0) + { + if (--argc < 1) goto bad; + srp_arg.strength=atoi(*(++argv)); + BIO_printf(bio_err,"SRP minimal length for N is %d\n",srp_arg.strength); + meth=TLSv1_client_method(); + } + else if (strcmp(*argv,"-srp_lateuser") == 0) + { + srp_lateuser= 1; + meth=TLSv1_client_method(); + } + else if (strcmp(*argv,"-srp_moregroups") == 0) + { + srp_arg.amp=1; + meth=TLSv1_client_method(); + } +#endif #ifndef OPENSSL_NO_SSL2 else if (strcmp(*argv,"-ssl2") == 0) meth=SSLv2_client_method(); @@ -639,6 +814,10 @@ int MAIN(int argc, char **argv) meth=SSLv3_client_method(); #endif #ifndef OPENSSL_NO_TLS1 + else if (strcmp(*argv,"-tls1_2") == 0) + meth=TLSv1_2_client_method(); + else if (strcmp(*argv,"-tls1_1") == 0) + meth=TLSv1_1_client_method(); else if (strcmp(*argv,"-tls1") == 0) meth=TLSv1_client_method(); #endif @@ -687,6 +866,10 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; CAfile= *(++argv); } + else if (strcmp(*argv,"-no_tls1_2") == 0) + off|=SSL_OP_NO_TLSv1_2; + else if (strcmp(*argv,"-no_tls1_1") == 0) + off|=SSL_OP_NO_TLSv1_1; else if (strcmp(*argv,"-no_tls1") == 0) off|=SSL_OP_NO_TLSv1; else if (strcmp(*argv,"-no_ssl3") == 0) @@ -704,10 +887,17 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; next_proto_neg_in = *(++argv); } + else if (strcmp(*argv,"-alpn") == 0) + { + if (--argc < 1) goto bad; + alpn_in = *(++argv); + } # endif #endif else if (strcmp(*argv,"-cutthrough") == 0) cutthrough=1; + else if (strcmp(*argv,"-no_record_splitting") == 0) + no_record_splitting=1; else if (strcmp(*argv,"-serverpref") == 0) off|=SSL_OP_CIPHER_SERVER_PREFERENCE; else if (strcmp(*argv,"-legacy_renegotiation") == 0) @@ -774,7 +964,25 @@ int MAIN(int argc, char **argv) jpake_secret = *++argv; } #endif - else +#ifndef OPENSSL_NO_SRTP + else if (strcmp(*argv,"-use_srtp") == 0) + { + if (--argc < 1) goto bad; + srtp_profiles = *(++argv); + } +#endif + else if (strcmp(*argv,"-keymatexport") == 0) + { + if (--argc < 1) goto bad; + keymatexportlabel= *(++argv); + } + else if (strcmp(*argv,"-keymatexportlen") == 0) + { + if (--argc < 1) goto bad; + keymatexportlen=atoi(*(++argv)); + if (keymatexportlen == 0) goto bad; + } + else { BIO_printf(bio_err,"unknown option %s\n",*argv); badop=1; @@ -800,14 +1008,13 @@ bad: goto end; } psk_identity = "JPAKE"; + if (cipher) + { + BIO_printf(bio_err, "JPAKE sets cipher to PSK\n"); + goto end; + } + cipher = "PSK"; } - - if (cipher) - { - BIO_printf(bio_err, "JPAKE sets cipher to PSK\n"); - goto end; - } - cipher = "PSK"; #endif OpenSSL_add_ssl_algorithms(); @@ -901,6 +1108,14 @@ bad: } } +#ifndef OPENSSL_NO_SRP + if(!app_passwd(bio_err, srppass, NULL, &srp_arg.srppassin, NULL)) + { + BIO_printf(bio_err, "Error getting password\n"); + goto end; + } +#endif + ctx=SSL_CTX_new(meth); if (ctx == NULL) { @@ -936,6 +1151,10 @@ bad: BIO_printf(bio_c_out, "PSK key given or JPAKE in use, setting client callback\n"); SSL_CTX_set_psk_client_callback(ctx, psk_client_cb); } +#endif +#ifndef OPENSSL_NO_SRTP + if (srtp_profiles != NULL) + SSL_CTX_set_tlsext_use_srtp(ctx, srtp_profiles); #endif if (bugs) SSL_CTX_set_options(ctx,SSL_OP_ALL|off); @@ -949,19 +1168,35 @@ bad: */ if (socket_type == SOCK_DGRAM) SSL_CTX_set_read_ahead(ctx, 1); - /* Enable handshake cutthrough for client connections using - * strong ciphers. */ +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) + if (next_proto.data) + SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto); +# endif + if (alpn_in) + { + unsigned short alpn_len; + unsigned char *alpn = next_protos_parse(&alpn_len, alpn_in); + + if (alpn == NULL) + { + BIO_printf(bio_err, "Error parsing -alpn argument\n"); + goto end; + } + SSL_CTX_set_alpn_protos(ctx, alpn, alpn_len); + } +#endif + + ssl_mode = SSL_CTX_get_mode(ctx); + if (!no_record_splitting) + ssl_mode |= SSL_MODE_CBC_RECORD_SPLITTING; if (cutthrough) { - int ssl_mode = SSL_CTX_get_mode(ctx); + /* Enable handshake cutthrough for client connections using + * strong ciphers. */ ssl_mode |= SSL_MODE_HANDSHAKE_CUTTHROUGH; - SSL_CTX_set_mode(ctx, ssl_mode); } - -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) - if (next_proto.data) - SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto); -#endif + SSL_CTX_set_mode(ctx, ssl_mode); if (state) SSL_CTX_set_info_callback(ctx,apps_ssl_info_callback); if (cipher != NULL) @@ -994,6 +1229,24 @@ bad: SSL_CTX_set_tlsext_servername_callback(ctx, ssl_servername_cb); SSL_CTX_set_tlsext_servername_arg(ctx, &tlsextcbp); } +#ifndef OPENSSL_NO_SRP + if (srp_arg.srplogin) + { + if (!srp_lateuser && !SSL_CTX_set_srp_username(ctx, srp_arg.srplogin)) + { + BIO_printf(bio_err,"Unable to set SRP username\n"); + goto end; + } + srp_arg.msg = c_msg; + srp_arg.debug = c_debug ; + SSL_CTX_set_srp_cb_arg(ctx,&srp_arg); + SSL_CTX_set_srp_client_pwd_callback(ctx, ssl_give_srp_client_pwd_cb); + SSL_CTX_set_srp_strength(ctx, srp_arg.strength); + if (c_msg || c_debug || srp_arg.amp == 0) + SSL_CTX_set_srp_verify_param_callback(ctx, ssl_srp_verify_param_cb); + } + +#endif #endif con=SSL_new(ctx); @@ -1032,9 +1285,10 @@ bad: } #endif #ifndef OPENSSL_NO_KRB5 - if (con && (con->kssl_ctx = kssl_ctx_new()) != NULL) + if (con && (kctx = kssl_ctx_new()) != NULL) { - kssl_ctx_setstring(con->kssl_ctx, KSSL_SERVER, host); + SSL_set0_kssl_ctx(con, kctx); + kssl_ctx_setstring(kctx, KSSL_SERVER, host); } #endif /* OPENSSL_NO_KRB5 */ /* SSL_set_cipher_list(con,"RC4-MD5"); */ @@ -1066,7 +1320,7 @@ re_start: } } #endif - if (c_Pause & 0x01) con->debug=1; + if (c_Pause & 0x01) SSL_set_debug(con, 1); if ( SSL_version(con) == DTLS1_VERSION) { @@ -1115,7 +1369,7 @@ re_start: if (c_debug) { - con->debug=1; + SSL_set_debug(con, 1); BIO_set_callback(sbio,bio_dump_callback); BIO_set_callback_arg(sbio,(char *)bio_c_out); } @@ -1649,6 +1903,14 @@ printf("read=%d pending=%d peek=%d\n",k,SSL_pending(con),SSL_peek(con,zbuf,10240 SSL_renegotiate(con); cbuf_len=0; } +#ifndef OPENSSL_NO_HEARTBEATS + else if ((!c_ign_eof) && (cbuf[0] == 'B')) + { + BIO_printf(bio_err,"HEARTBEATING\n"); + SSL_heartbeat(con); + cbuf_len=0; + } +#endif else { cbuf_len=i; @@ -1676,6 +1938,10 @@ end: print_stuff(bio_c_out,con,1); SSL_free(con); } +#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) + if (next_proto.data) + OPENSSL_free(next_proto.data); +#endif if (ctx != NULL) SSL_CTX_free(ctx); if (cert) X509_free(cert); @@ -1683,6 +1949,8 @@ end: EVP_PKEY_free(key); if (pass) OPENSSL_free(pass); + if (vpm) + X509_VERIFY_PARAM_free(vpm); if (cbuf != NULL) { OPENSSL_cleanse(cbuf,BUFSIZZ); OPENSSL_free(cbuf); } if (sbuf != NULL) { OPENSSL_cleanse(sbuf,BUFSIZZ); OPENSSL_free(sbuf); } if (mbuf != NULL) { OPENSSL_cleanse(mbuf,BUFSIZZ); OPENSSL_free(mbuf); } @@ -1710,6 +1978,7 @@ static void print_stuff(BIO *bio, SSL *s, int full) #ifndef OPENSSL_NO_COMP const COMP_METHOD *comp, *expansion; #endif + unsigned char *exportedkeymat; if (full) { @@ -1800,7 +2069,7 @@ static void print_stuff(BIO *bio, SSL *s, int full) BIO_number_read(SSL_get_rbio(s)), BIO_number_written(SSL_get_wbio(s))); } - BIO_printf(bio,((s->hit)?"---\nReused, ":"---\nNew, ")); + BIO_printf(bio,(SSL_cache_hit(s)?"---\nReused, ":"---\nNew, ")); c=SSL_get_current_cipher(s); BIO_printf(bio,"%s, Cipher is %s\n", SSL_CIPHER_get_version(c), @@ -1822,8 +2091,21 @@ static void print_stuff(BIO *bio, SSL *s, int full) BIO_printf(bio,"Expansion: %s\n", expansion ? SSL_COMP_get_name(expansion) : "NONE"); #endif + +#ifdef SSL_DEBUG + { + /* Print out local port of connection: useful for debugging */ + int sock; + struct sockaddr_in ladd; + socklen_t ladd_size = sizeof(ladd); + sock = SSL_get_fd(s); + getsockname(sock, (struct sockaddr *)&ladd, &ladd_size); + BIO_printf(bio_c_out, "LOCAL PORT is %u\n", ntohs(ladd.sin_port)); + } +#endif -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) if (next_proto.status != -1) { const unsigned char *proto; unsigned int proto_len; @@ -1832,9 +2114,60 @@ static void print_stuff(BIO *bio, SSL *s, int full) BIO_write(bio, proto, proto_len); BIO_write(bio, "\n", 1); } + { + const unsigned char *proto; + unsigned int proto_len; + SSL_get0_alpn_selected(s, &proto, &proto_len); + if (proto_len > 0) + { + BIO_printf(bio, "ALPN protocol: "); + BIO_write(bio, proto, proto_len); + BIO_write(bio, "\n", 1); + } + else + BIO_printf(bio, "No ALPN negotiated\n"); + } +# endif #endif +#ifndef OPENSSL_NO_SRTP + { + SRTP_PROTECTION_PROFILE *srtp_profile=SSL_get_selected_srtp_profile(s); + + if(srtp_profile) + BIO_printf(bio,"SRTP Extension negotiated, profile=%s\n", + srtp_profile->name); + } +#endif + SSL_SESSION_print(bio,SSL_get_session(s)); + if (keymatexportlabel != NULL) + { + BIO_printf(bio, "Keying material exporter:\n"); + BIO_printf(bio, " Label: '%s'\n", keymatexportlabel); + BIO_printf(bio, " Length: %i bytes\n", keymatexportlen); + exportedkeymat = OPENSSL_malloc(keymatexportlen); + if (exportedkeymat != NULL) + { + if (!SSL_export_keying_material(s, exportedkeymat, + keymatexportlen, + keymatexportlabel, + strlen(keymatexportlabel), + NULL, 0, 0)) + { + BIO_printf(bio, " Error\n"); + } + else + { + BIO_printf(bio, " Keying material: "); + for (i=0; i #endif +#ifndef OPENSSL_NO_SRP +#include +#endif #include "s_apps.h" #include "timeouts.h" @@ -290,6 +293,9 @@ static int cert_status_cb(SSL *s, void *arg); static int s_msg=0; static int s_quiet=0; +static char *keymatexportlabel=NULL; +static int keymatexportlen=20; + static int hack=0; #ifndef OPENSSL_NO_ENGINE static char *engine_id=NULL; @@ -302,6 +308,7 @@ static long socket_mtu; static int cert_chain = 0; #endif + #ifndef OPENSSL_NO_PSK static char *psk_identity="Client_identity"; char *psk_key=NULL; /* by default PSK is not used */ @@ -369,6 +376,52 @@ static unsigned int psk_server_cb(SSL *ssl, const char *identity, } #endif +#ifndef OPENSSL_NO_SRP +/* This is a context that we pass to callbacks */ +typedef struct srpsrvparm_st + { + char *login; + SRP_VBASE *vb; + SRP_user_pwd *user; + } srpsrvparm; + +/* This callback pretends to require some asynchronous logic in order to obtain + a verifier. When the callback is called for a new connection we return + with a negative value. This will provoke the accept etc to return with + an LOOKUP_X509. The main logic of the reinvokes the suspended call + (which would normally occur after a worker has finished) and we + set the user parameters. +*/ +static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) + { + srpsrvparm *p = (srpsrvparm *)arg; + if (p->login == NULL && p->user == NULL ) + { + p->login = SSL_get_srp_username(s); + BIO_printf(bio_err, "SRP username = \"%s\"\n", p->login); + return (-1) ; + } + + if (p->user == NULL) + { + BIO_printf(bio_err, "User %s doesn't exist\n", p->login); + return SSL3_AL_FATAL; + } + if (SSL_set_srp_server_param(s, p->user->N, p->user->g, p->user->s, p->user->v, + p->user->info) < 0) + { + *ad = SSL_AD_INTERNAL_ERROR; + return SSL3_AL_FATAL; + } + BIO_printf(bio_err, "SRP parameters set: username = \"%s\" info=\"%s\" \n", p->login,p->user->info); + /* need to check whether there are memory leaks */ + p->user = NULL; + p->login = NULL; + return SSL_ERROR_NONE; + } + +#endif + #ifdef MONOLITH static void s_server_init(void) { @@ -455,9 +508,15 @@ static void sv_usage(void) # ifndef OPENSSL_NO_JPAKE BIO_printf(bio_err," -jpake arg - JPAKE secret to use\n"); # endif +#endif +#ifndef OPENSSL_NO_SRP + BIO_printf(bio_err," -srpvfile file - The verifier file for SRP\n"); + BIO_printf(bio_err," -srpuserseed string - A seed string for a default user salt.\n"); #endif BIO_printf(bio_err," -ssl2 - Just talk SSLv2\n"); BIO_printf(bio_err," -ssl3 - Just talk SSLv3\n"); + BIO_printf(bio_err," -tls1_2 - Just talk TLSv1.2\n"); + BIO_printf(bio_err," -tls1_1 - Just talk TLSv1.1\n"); BIO_printf(bio_err," -tls1 - Just talk TLSv1\n"); BIO_printf(bio_err," -dtls1 - Just talk DTLSv1\n"); BIO_printf(bio_err," -timeout - Enable timeouts\n"); @@ -466,6 +525,8 @@ static void sv_usage(void) BIO_printf(bio_err," -no_ssl2 - Just disable SSLv2\n"); BIO_printf(bio_err," -no_ssl3 - Just disable SSLv3\n"); BIO_printf(bio_err," -no_tls1 - Just disable TLSv1\n"); + BIO_printf(bio_err," -no_tls1_1 - Just disable TLSv1.1\n"); + BIO_printf(bio_err," -no_tls1_2 - Just disable TLSv1.2\n"); #ifndef OPENSSL_NO_DH BIO_printf(bio_err," -no_dhe - Disable ephemeral DH\n"); #endif @@ -495,7 +556,12 @@ static void sv_usage(void) # ifndef OPENSSL_NO_NEXTPROTONEG BIO_printf(bio_err," -nextprotoneg arg - set the advertised protocols for the NPN extension (comma-separated list)\n"); # endif +# ifndef OPENSSL_NO_SRTP + BIO_printf(bio_err," -use_srtp profiles - Offer SRTP key management with a colon-separated profile list\n"); +# endif #endif + BIO_printf(bio_err," -keymatexport label - Export keying material using label\n"); + BIO_printf(bio_err," -keymatexportlen len - Export len bytes of keying material (default 20)\n"); } static int local_argc=0; @@ -846,7 +912,9 @@ static int next_proto_cb(SSL *s, const unsigned char **data, unsigned int *len, return SSL_TLSEXT_ERR_OK; } -# endif /* ndef OPENSSL_NO_NPN */ +# endif /* ndef OPENSSL_NO_NEXTPROTONEG */ + + #endif int MAIN(int, char **); @@ -854,6 +922,12 @@ int MAIN(int, char **); #ifndef OPENSSL_NO_JPAKE static char *jpake_secret = NULL; #endif +#ifndef OPENSSL_NO_SRP + static srpsrvparm srp_callback_parm; +#endif +#ifndef OPENSSL_NO_SRTP +static char *srtp_profiles = NULL; +#endif int MAIN(int argc, char *argv[]) { @@ -885,8 +959,6 @@ int MAIN(int argc, char *argv[]) #ifndef OPENSSL_NO_TLSEXT EVP_PKEY *s_key2 = NULL; X509 *s_cert2 = NULL; -#endif -#ifndef OPENSSL_NO_TLSEXT tlsextctx tlsextcbp = {NULL, NULL, SSL_TLSEXT_ERR_ALERT_WARNING}; # ifndef OPENSSL_NO_NEXTPROTONEG const char *next_proto_neg_in = NULL; @@ -897,13 +969,11 @@ int MAIN(int argc, char *argv[]) /* by default do not send a PSK identity hint */ static char *psk_identity_hint=NULL; #endif -#if !defined(OPENSSL_NO_SSL2) && !defined(OPENSSL_NO_SSL3) - meth=SSLv23_server_method(); -#elif !defined(OPENSSL_NO_SSL3) - meth=SSLv3_server_method(); -#elif !defined(OPENSSL_NO_SSL2) - meth=SSLv2_server_method(); +#ifndef OPENSSL_NO_SRP + char *srpuserseed = NULL; + char *srp_verifier_file = NULL; #endif + meth=SSLv23_server_method(); local_argc=argc; local_argv=argv; @@ -1128,12 +1198,26 @@ int MAIN(int argc, char *argv[]) psk_key=*(++argv); for (i=0; ikssl_ctx = kssl_ctx_new()) != NULL) + if ((kctx = kssl_ctx_new()) != NULL) { - kssl_ctx_setstring(con->kssl_ctx, KSSL_SERVICE, - KRB5SVC); - kssl_ctx_setstring(con->kssl_ctx, KSSL_KEYTAB, - KRB5KEYTAB); + SSL_set0_kssl_ctx(con, kctx); + kssl_ctx_setstring(kctx, KSSL_SERVICE, KRB5SVC); + kssl_ctx_setstring(kctx, KSSL_KEYTAB, KRB5KEYTAB); } #endif /* OPENSSL_NO_KRB5 */ if(context) @@ -1924,7 +2068,7 @@ static int sv_body(char *hostname, int s, unsigned char *context) if (s_debug) { - con->debug=1; + SSL_set_debug(con, 1); BIO_set_callback(SSL_get_rbio(con),bio_dump_callback); BIO_set_callback_arg(SSL_get_rbio(con),(char *)bio_s_out); } @@ -2053,6 +2197,16 @@ static int sv_body(char *hostname, int s, unsigned char *context) goto err; } +#ifndef OPENSSL_NO_HEARTBEATS + if ((buf[0] == 'B') && + ((buf[1] == '\n') || (buf[1] == '\r'))) + { + BIO_printf(bio_err,"HEARTBEATING\n"); + SSL_heartbeat(con); + i=0; + continue; + } +#endif if ((buf[0] == 'r') && ((buf[1] == '\n') || (buf[1] == '\r'))) { @@ -2096,6 +2250,18 @@ static int sv_body(char *hostname, int s, unsigned char *context) { static count=0; if (++count == 100) { count=0; SSL_renegotiate(con); } } #endif k=SSL_write(con,&(buf[l]),(unsigned int)i); +#ifndef OPENSSL_NO_SRP + while (SSL_get_error(con,k) == SSL_ERROR_WANT_X509_LOOKUP) + { + BIO_printf(bio_s_out,"LOOKUP renego during write\n"); + srp_callback_parm.user = SRP_VBASE_get_by_user(srp_callback_parm.vb, srp_callback_parm.login); + if (srp_callback_parm.user) + BIO_printf(bio_s_out,"LOOKUP done %s\n",srp_callback_parm.user->info); + else + BIO_printf(bio_s_out,"LOOKUP not successful\n"); + k=SSL_write(con,&(buf[l]),(unsigned int)i); + } +#endif switch (SSL_get_error(con,k)) { case SSL_ERROR_NONE: @@ -2143,6 +2309,18 @@ static int sv_body(char *hostname, int s, unsigned char *context) { again: i=SSL_read(con,(char *)buf,bufsize); +#ifndef OPENSSL_NO_SRP + while (SSL_get_error(con,i) == SSL_ERROR_WANT_X509_LOOKUP) + { + BIO_printf(bio_s_out,"LOOKUP renego during read\n"); + srp_callback_parm.user = SRP_VBASE_get_by_user(srp_callback_parm.vb, srp_callback_parm.login); + if (srp_callback_parm.user) + BIO_printf(bio_s_out,"LOOKUP done %s\n",srp_callback_parm.user->info); + else + BIO_printf(bio_s_out,"LOOKUP not successful\n"); + i=SSL_read(con,(char *)buf,bufsize); + } +#endif switch (SSL_get_error(con,i)) { case SSL_ERROR_NONE: @@ -2155,7 +2333,6 @@ again: break; case SSL_ERROR_WANT_WRITE: case SSL_ERROR_WANT_READ: - case SSL_ERROR_WANT_X509_LOOKUP: BIO_printf(bio_s_out,"Read BLOCK\n"); break; case SSL_ERROR_SYSCALL: @@ -2210,12 +2387,30 @@ static int init_ssl_connection(SSL *con) X509 *peer; long verify_error; MS_STATIC char buf[BUFSIZ]; +#ifndef OPENSSL_NO_KRB5 + char *client_princ; +#endif #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) const unsigned char *next_proto_neg; unsigned next_proto_neg_len; #endif + unsigned char *exportedkeymat; + - if ((i=SSL_accept(con)) <= 0) + i=SSL_accept(con); +#ifndef OPENSSL_NO_SRP + while (i <= 0 && SSL_get_error(con,i) == SSL_ERROR_WANT_X509_LOOKUP) + { + BIO_printf(bio_s_out,"LOOKUP during accept %s\n",srp_callback_parm.login); + srp_callback_parm.user = SRP_VBASE_get_by_user(srp_callback_parm.vb, srp_callback_parm.login); + if (srp_callback_parm.user) + BIO_printf(bio_s_out,"LOOKUP done %s\n",srp_callback_parm.user->info); + else + BIO_printf(bio_s_out,"LOOKUP not successful\n"); + i=SSL_accept(con); + } +#endif + if (i <= 0) { if (BIO_sock_should_retry(i)) { @@ -2253,6 +2448,7 @@ static int init_ssl_connection(SSL *con) BIO_printf(bio_s_out,"Shared ciphers:%s\n",buf); str=SSL_CIPHER_get_name(SSL_get_current_cipher(con)); BIO_printf(bio_s_out,"CIPHER is %s\n",(str != NULL)?str:"(NONE)"); + #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) SSL_get0_next_proto_negotiated(con, &next_proto_neg, &next_proto_neg_len); if (next_proto_neg) @@ -2262,19 +2458,60 @@ static int init_ssl_connection(SSL *con) BIO_printf(bio_s_out, "\n"); } #endif - if (con->hit) BIO_printf(bio_s_out,"Reused session-id\n"); +#ifndef OPENSSL_NO_SRTP + { + SRTP_PROTECTION_PROFILE *srtp_profile + = SSL_get_selected_srtp_profile(con); + + if(srtp_profile) + BIO_printf(bio_s_out,"SRTP Extension negotiated, profile=%s\n", + srtp_profile->name); + } +#endif + if (SSL_cache_hit(con)) BIO_printf(bio_s_out,"Reused session-id\n"); if (SSL_ctrl(con,SSL_CTRL_GET_FLAGS,0,NULL) & TLS1_FLAGS_TLS_PADDING_BUG) - BIO_printf(bio_s_out,"Peer has incorrect TLSv1 block padding\n"); + BIO_printf(bio_s_out, + "Peer has incorrect TLSv1 block padding\n"); #ifndef OPENSSL_NO_KRB5 - if (con->kssl_ctx->client_princ != NULL) + client_princ = kssl_ctx_get0_client_princ(SSL_get0_kssl_ctx(con)); + if (client_princ != NULL) { BIO_printf(bio_s_out,"Kerberos peer principal is %s\n", - con->kssl_ctx->client_princ); + client_princ); } #endif /* OPENSSL_NO_KRB5 */ BIO_printf(bio_s_out, "Secure Renegotiation IS%s supported\n", SSL_get_secure_renegotiation_support(con) ? "" : " NOT"); + if (keymatexportlabel != NULL) + { + BIO_printf(bio_s_out, "Keying material exporter:\n"); + BIO_printf(bio_s_out, " Label: '%s'\n", keymatexportlabel); + BIO_printf(bio_s_out, " Length: %i bytes\n", + keymatexportlen); + exportedkeymat = OPENSSL_malloc(keymatexportlen); + if (exportedkeymat != NULL) + { + if (!SSL_export_keying_material(con, exportedkeymat, + keymatexportlen, + keymatexportlabel, + strlen(keymatexportlabel), + NULL, 0, 0)) + { + BIO_printf(bio_s_out, " Error\n"); + } + else + { + BIO_printf(bio_s_out, " Keying material: "); + for (i=0; ikssl_ctx = kssl_ctx_new()) != NULL) + if ((kctx = kssl_ctx_new()) != NULL) { - kssl_ctx_setstring(con->kssl_ctx, KSSL_SERVICE, KRB5SVC); - kssl_ctx_setstring(con->kssl_ctx, KSSL_KEYTAB, KRB5KEYTAB); + kssl_ctx_setstring(kctx, KSSL_SERVICE, KRB5SVC); + kssl_ctx_setstring(kctx, KSSL_KEYTAB, KRB5KEYTAB); } #endif /* OPENSSL_NO_KRB5 */ if(context) SSL_set_session_id_context(con, context, @@ -2382,7 +2625,7 @@ static int www_body(char *hostname, int s, unsigned char *context) if (s_debug) { - con->debug=1; + SSL_set_debug(con, 1); BIO_set_callback(SSL_get_rbio(con),bio_dump_callback); BIO_set_callback_arg(SSL_get_rbio(con),(char *)bio_s_out); } @@ -2397,7 +2640,18 @@ static int www_body(char *hostname, int s, unsigned char *context) if (hack) { i=SSL_accept(con); - +#ifndef OPENSSL_NO_SRP + while (i <= 0 && SSL_get_error(con,i) == SSL_ERROR_WANT_X509_LOOKUP) + { + BIO_printf(bio_s_out,"LOOKUP during accept %s\n",srp_callback_parm.login); + srp_callback_parm.user = SRP_VBASE_get_by_user(srp_callback_parm.vb, srp_callback_parm.login); + if (srp_callback_parm.user) + BIO_printf(bio_s_out,"LOOKUP done %s\n",srp_callback_parm.user->info); + else + BIO_printf(bio_s_out,"LOOKUP not successful\n"); + i=SSL_accept(con); + } +#endif switch (SSL_get_error(con,i)) { case SSL_ERROR_NONE: @@ -2465,6 +2719,11 @@ static int www_body(char *hostname, int s, unsigned char *context) } BIO_puts(io,"\n"); + BIO_printf(io, + "Secure Renegotiation IS%s supported\n", + SSL_get_secure_renegotiation_support(con) ? + "" : " NOT"); + /* The following is evil and should not really * be done */ BIO_printf(io,"Ciphers supported in s_server binary\n"); @@ -2503,7 +2762,7 @@ static int www_body(char *hostname, int s, unsigned char *context) } BIO_puts(io,"\n"); } - BIO_printf(io,((con->hit) + BIO_printf(io,(SSL_cache_hit(con) ?"---\nReused, " :"---\nNew, ")); c=SSL_get_current_cipher(con); diff --git a/app/openssl/apps/s_socket.c b/app/openssl/apps/s_socket.c index c08544a1..380efdb1 100644 --- a/app/openssl/apps/s_socket.c +++ b/app/openssl/apps/s_socket.c @@ -238,11 +238,10 @@ int init_client(int *sock, char *host, int port, int type) { unsigned char ip[4]; + memset(ip, '\0', sizeof ip); if (!host_ip(host,&(ip[0]))) - { - return(0); - } - return(init_client_ip(sock,ip,port,type)); + return 0; + return init_client_ip(sock,ip,port,type); } static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) diff --git a/app/openssl/apps/server.pem b/app/openssl/apps/server.pem index 56248e57..d0fc265f 100644 --- a/app/openssl/apps/server.pem +++ b/app/openssl/apps/server.pem @@ -1,369 +1,52 @@ -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) -subject= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Server test cert (512 bit) ------BEGIN CERTIFICATE----- -MIIB6TCCAVICAQYwDQYJKoZIhvcNAQEEBQAwWzELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYD -VQQDExJUZXN0IENBICgxMDI0IGJpdCkwHhcNMDAxMDE2MjIzMTAzWhcNMDMwMTE0 -MjIzMTAzWjBjMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDEaMBgG -A1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxIzAhBgNVBAMTGlNlcnZlciB0ZXN0IGNl -cnQgKDUxMiBiaXQpMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBAJ+zw4Qnlf8SMVIP -Fe9GEcStgOY2Ww/dgNdhjeD8ckUJNP5VZkVDTGiXav6ooKXfX3j/7tdkuD8Ey2// -Kv7+ue0CAwEAATANBgkqhkiG9w0BAQQFAAOBgQCT0grFQeZaqYb5EYfk20XixZV4 -GmyAbXMftG1Eo7qGiMhYzRwGNWxEYojf5PZkYZXvSqZ/ZXHXa4g59jK/rJNnaVGM -k+xIX8mxQvlV0n5O9PIha5BX5teZnkHKgL8aKKLKW1BK7YTngsfSzzaeame5iKfz -itAE+OjGF+PFKbwX8Q== ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIIBPAIBAAJBAJ+zw4Qnlf8SMVIPFe9GEcStgOY2Ww/dgNdhjeD8ckUJNP5VZkVD -TGiXav6ooKXfX3j/7tdkuD8Ey2//Kv7+ue0CAwEAAQJAN6W31vDEP2DjdqhzCDDu -OA4NACqoiFqyblo7yc2tM4h4xMbC3Yx5UKMN9ZkCtX0gzrz6DyF47bdKcWBzNWCj -gQIhANEoojVt7hq+SQ6MCN6FTAysGgQf56Q3TYoJMoWvdiXVAiEAw3e3rc+VJpOz -rHuDo6bgpjUAAXM+v3fcpsfZSNO6V7kCIQCtbVjanpUwvZkMI9by02oUk9taki3b -PzPfAfNPYAbCJQIhAJXNQDWyqwn/lGmR11cqY2y9nZ1+5w3yHGatLrcDnQHxAiEA -vnlEGo8K85u+KwIOimM48ZG8oTk7iFdkqLJR1utT3aU= ------END RSA PRIVATE KEY----- -subject=/C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -issuer= /C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -notBefore=950413210656Z -notAfter =970412210656Z ------BEGIN X509 CERTIFICATE----- - -MIICCDCCAXECAQAwDQYJKoZIhvcNAQEEBQAwTjELMAkGA1UEBhMCVVMxHzAdBgNV -BAoUFkFUJlQgQmVsbCBMYWJvcmF0b3JpZXMxHjAcBgNVBAsUFVByb3RvdHlwZSBS -ZXNlYXJjaCBDQTAeFw05NTA0MTMyMTA2NTZaFw05NzA0MTIyMTA2NTZaME4xCzAJ -BgNVBAYTAlVTMR8wHQYDVQQKFBZBVCZUIEJlbGwgTGFib3JhdG9yaWVzMR4wHAYD -VQQLFBVQcm90b3R5cGUgUmVzZWFyY2ggQ0EwgZwwDQYJKoZIhvcNAQEBBQADgYoA -MIGGAoGAebOmgtSCl+wCYZc86UGYeTLY8cjmW2P0FN8ToT/u2pECCoFdrlycX0OR -3wt0ZhpFXLVNeDnHwEE9veNUih7pCL2ZBFqoIoQkB1lZmXRiVtjGonz8BLm/qrFM -YHb0lme/Ol+s118mwKVxnn6bSAeI/OXKhLaVdYZWk+aEaxEDkVkCAQ8wDQYJKoZI -hvcNAQEEBQADgYEAAZMG14lZmZ8bahkaHaTV9dQf4p2FZiQTFwHP9ZyGsXPC+LT5 -dG5iTaRmyjNIJdPWohZDl97kAci79aBndvuEvRKOjLHs3WRGBIwERnAcnY9Mz8u/ -zIHK23PjYVxGGaZd669OJwD0CYyqH22HH9nFUGaoJdsv39ChW0NRdLE9+y8= ------END X509 CERTIFICATE----- -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJjCCAY8CAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTc0M1oXDTAxMDYw -OTEzNTc0M1owWzELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYDVQQDExJUZXN0IENBICgxMDI0 -IGJpdCkwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAKO7o8t116VP6cgybTsZ -DCZhr95nYlZuya3aCi1IKoztqwWnjbmDFIriOqGFPrZQ+moMETC9D59iRW/dFXSv -1F65ka/XY2hLh9exCCo7XuUcDs53Qp3bI3AmMqHjgzE8oO3ajyJAzJkTTOUecQU2 -mw/gI4tMM0LqWMQS7luTy4+xAgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAM7achv3v -hLQJcv/65eGEpBXM40ZDVoFQFFJWaY5p883HTqLB1x4FdzsXHH0QKBTcKpWwqyu4 -YDm3fb8oDugw72bCzfyZK/zVZPR/hVlqI/fvU109Qoc+7oPvIXWky71HfcK6ZBCA -q30KIqGM/uoM60INq97qjDmCJapagcNBGQs= ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIICXQIBAAKBgQCju6PLddelT+nIMm07GQwmYa/eZ2JWbsmt2gotSCqM7asFp425 -gxSK4jqhhT62UPpqDBEwvQ+fYkVv3RV0r9ReuZGv12NoS4fXsQgqO17lHA7Od0Kd -2yNwJjKh44MxPKDt2o8iQMyZE0zlHnEFNpsP4COLTDNC6ljEEu5bk8uPsQIDAQAB -AoGAVZmpFZsDZfr0l2S9tLLwpjRWNOlKATQkno6q2WesT0eGLQufTciY+c8ypfU6 -hyio8r5iUl/VhhdjhAtKx1mRpiotftHo/eYf8rtsrnprOnWG0bWjLjtIoMbcxGn2 -J3bN6LJmbJMjDs0eJ3KnTu646F3nDUw2oGAwmpzKXA1KAP0CQQDRvQhxk2D3Pehs -HvG665u2pB5ipYQngEFlZO7RHJZzJOZEWSLuuMqaF/7pTfA5jiBvWqCgJeCRRInL -21ru4dlPAkEAx9jj7BgKn5TYnMoBSSe0afjsV9oApVpN1Nacb1YDtCwy+scp3++s -nFxlv98wxIlSdpwMUn+AUWfjiWR7Tu/G/wJBAJ/KjwZIrFVxewP0x2ILYsTRYLzz -MS4PDsO7FB+I0i7DbBOifXS2oNSpd3I0CNMwrxFnUHzynpbOStVfN3ZL5w0CQQCa -pwFahxBRhkJKsxhjoFJBX9yl75JoY4Wvm5Tbo9ih6UJaRx3kqfkN14L2BKYcsZgb -KY9vmDOYy6iNfjDeWTfJAkBkfPUb8oTJ/nSP5zN6sqGxSY4krc4xLxpRmxoJ8HL2 -XfhqXkTzbU13RX9JJ/NZ8vQN9Vm2NhxRGJocQkmcdVtJ ------END RSA PRIVATE KEY----- ------BEGIN X509 CERTIFICATE----- -MIICYDCCAiACAgEoMAkGBSsOAwINBQAwfDELMAkGA1UEBhMCVVMxNjA0BgNVBAoT -LU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFuZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZ -MBcGA1UECxMQVGVzdCBFbnZpcm9ubWVudDEaMBgGA1UECxMRRFNTLU5BU0EtUGls -b3QtQ0EwHhcNOTYwMjI2MTYzMjQ1WhcNOTcwMjI1MTYzMjQ1WjB8MQswCQYDVQQG -EwJVUzE2MDQGA1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFk -bWluaXN0cmF0aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MRowGAYDVQQL -ExFEU1MtTkFTQS1QaWxvdC1DQTCB8jAJBgUrDgMCDAUAA4HkADCB4AJBAMA/ssKb -hPNUG7ZlASfVwEJU21O5OyF/iyBzgHI1O8eOhJGUYO8cc8wDMjR508Mr9cp6Uhl/ -ZB7FV5GkLNEnRHYCQQDUEaSg45P2qrDwixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLb -bn3QK74T2IxY1yY+kCNq8XrIqf5fJJzIH0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3 -fVd0geUCQQCzCFUQAh+ZkEmp5804cs6ZWBhrUAfnra8lJItYo9xPcXgdIfLfibcX -R71UsyO77MRD7B0+Ag2tq794IleCVcEEMAkGBSsOAwINBQADLwAwLAIUUayDfreR -Yh2WeU86/pHNdkUC1IgCFEfxe1f0oMpxJyrJ5XIxTi7vGdoK ------END X509 CERTIFICATE----- ------BEGIN X509 CERTIFICATE----- - -MIICGTCCAdgCAwCqTDAJBgUrDgMCDQUAMHwxCzAJBgNVBAYTAlVTMTYwNAYDVQQK -Ey1OYXRpb25hbCBBZXJvbmF1dGljcyBhbmQgU3BhY2UgQWRtaW5pc3RyYXRpb24x -GTAXBgNVBAsTEFRlc3QgRW52aXJvbm1lbnQxGjAYBgNVBAsTEURTUy1OQVNBLVBp -bG90LUNBMB4XDTk2MDUxNDE3MDE0MVoXDTk3MDUxNDE3MDE0MVowMzELMAkGA1UE -BhMCQVUxDzANBgNVBAoTBk1pbmNvbTETMBEGA1UEAxMKRXJpYyBZb3VuZzCB8jAJ -BgUrDgMCDAUAA4HkADCB4AJBAKbfHz6vE6pXXMTpswtGUec2tvnfLJUsoxE9qs4+ -ObZX7LmLvragNPUeiTJx7UOWZ5DfBj6bXLc8eYne0lP1g3ACQQDUEaSg45P2qrDw -ixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLbbn3QK74T2IxY1yY+kCNq8XrIqf5fJJzI -H0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3fVd0geUCQQCzCFUQAh+ZkEmp5804cs6Z -WBhrUAfnra8lJItYo9xPcXgdIfLfibcXR71UsyO77MRD7B0+Ag2tq794IleCVcEE -MAkGBSsOAwINBQADMAAwLQIUWsuuJRE3VT4ueWkWMAJMJaZjj1ECFQCYY0zX4bzM -LC7obsrHD8XAHG+ZRG== ------END X509 CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICTTCCAbagAwIBAgIBADANBgkqhkiG9w0BAQQFADBMMQswCQYDVQQGEwJHQjEM -MAoGA1UEChMDVUNMMRgwFgYDVQQLEw9JQ0UtVEVMIFByb2plY3QxFTATBgNVBAMT -DFRydXN0RmFjdG9yeTAeFw05NzA0MjIxNDM5MTRaFw05ODA0MjIxNDM5MTRaMEwx -CzAJBgNVBAYTAkdCMQwwCgYDVQQKEwNVQ0wxGDAWBgNVBAsTD0lDRS1URUwgUHJv -amVjdDEVMBMGA1UEAxMMVHJ1c3RGYWN0b3J5MIGcMAoGBFUIAQECAgQAA4GNADCB -iQKBgQCEieR8NcXkUW1f0G6aC6u0i8q/98JqS6RxK5YmHIGKCkuTWAUjzLfUa4dt -U9igGCjTuxaDqlzEim+t/02pmiBZT9HaX++35MjQPUWmsChcYU5WyzGErXi+rQaw -zlwS73zM8qiPj/97lXYycWhgL0VaiDSPxRXEUdWoaGruom4mNQIDAQABo0IwQDAd -BgNVHQ4EFgQUHal1LZr7oVg5z6lYzrhTgZRCmcUwDgYDVR0PAQH/BAQDAgH2MA8G -A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAfaggfl6FZoioecjv0dq8 -/DXo/u11iMZvXn08gjX/zl2b4wtPbShOSY5FhkSm8GeySasz+/Nwb/uzfnIhokWi -lfPZHtlCWtXbIy/TN51eJyq04ceDCQDWvLC2enVg9KB+GJ34b5c5VaPRzq8MBxsA -S7ELuYGtmYgYm9NZOIr7yU0= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIIB6jCCAZQCAgEtMA0GCSqGSIb3DQEBBAUAMIGAMQswCQYDVQQGEwJVUzE2MDQG -A1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFkbWluaXN0cmF0 -aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MR4wHAYDVQQLExVNRDUtUlNB -LU5BU0EtUGlsb3QtQ0EwHhcNOTYwNDMwMjIwNTAwWhcNOTcwNDMwMjIwNTAwWjCB -gDELMAkGA1UEBhMCVVMxNjA0BgNVBAoTLU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFu -ZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZMBcGA1UECxMQVGVzdCBFbnZpcm9ubWVu -dDEeMBwGA1UECxMVTUQ1LVJTQS1OQVNBLVBpbG90LUNBMFkwCgYEVQgBAQICAgAD -SwAwSAJBALmmX5+GqAvcrWK13rfDrNX9UfeA7f+ijyBgeFQjYUoDpFqapw4nzQBL -bAXug8pKkRwa2Zh8YODhXsRWu2F/UckCAwEAATANBgkqhkiG9w0BAQQFAANBAH9a -OBA+QCsjxXgnSqHx04gcU8S49DVUb1f2XVoLnHlIb8RnX0k5O6mpHT5eti9bLkiW -GJNMJ4L0AJ/ac+SmHZc= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICajCCAdMCBDGA0QUwDQYJKoZIhvcNAQEEBQAwfTELMAkGA1UEBhMCQ2ExDzAN -BgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmlsaXR5IEFjY2VwdGVkMR8w -HQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRwwGgYDVQQDExNFbnRydXN0 -IERlbW8gV2ViIENBMB4XDTk2MDQyNjEzMzUwMVoXDTA2MDQyNjEzMzUwMVowfTEL -MAkGA1UEBhMCQ2ExDzANBgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmls -aXR5IEFjY2VwdGVkMR8wHQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRww -GgYDVQQDExNFbnRydXN0IERlbW8gV2ViIENBMIGdMA0GCSqGSIb3DQEBAQUAA4GL -ADCBhwKBgQCaroS7O1DA0hm4IefNYU1cx/nqOmzEnk291d1XqznDeF4wEgakbkCc -zTKxK791yNpXG5RmngqH7cygDRTHZJ6mfCRn0wGC+AI00F2vYTGqPGRQL1N3lZT0 -YDKFC0SQeMMjFIZ1aeQigroFQnHo0VB3zWIMpNkka8PY9lxHZAmWwQIBAzANBgkq -hkiG9w0BAQQFAAOBgQBAx0UMVA1s54lMQyXjMX5kj99FJN5itb8bK1Rk+cegPQPF -cWO9SEWyEjjBjIkjjzAwBkaEszFsNGxemxtXvwjIm1xEUMTVlPEWTs2qnDvAUA9W -YqhWbhH0toGT36236QAsqCZ76rbTRVSSX2BHyJwJMG2tCRv7kRJ//NIgxj3H4w== ------END CERTIFICATE----- - -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJzCCAZACAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTczN1oXDTAxMDYw -OTEzNTczN1owXDELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYDVQQDExNUZXN0IFBDQSAoMTAy -NCBiaXQpMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCdoWk/3+WcMlfjIrkg -40ketmnQaEogQe1LLcuOJV6rKfUSAsPgwgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp -22Jp85PmemiDzyUIStwk72qhp1imbANZvlmlCFKiQrjUyuDfu4TABmn+kkt3vR1Y -BEOGt+IFye1UBVSATVdRJ2UVhwIDAQABMA0GCSqGSIb3DQEBBAUAA4GBABNA1u/S -Cg/LJZWb7GliiKJsvuhxlE4E5JxQF2zMub/CSNbF97//tYSyj96sxeFQxZXbcjm9 -xt6mr/xNLA4szNQMJ4P+L7b5e/jC5DSqlwS+CUYJgaFs/SP+qJoCSu1bR3IM9XWO -cRBpDmcBbYLkSyB92WURvsZ1LtjEcn+cdQVI +subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert +issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA +-----BEGIN CERTIFICATE----- +MIID5zCCAs+gAwIBAgIJALnu1NlVpZ6zMA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT +VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt +ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZDELMAkG +A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU +RVNUSU5HIFBVUlBPU0VTIE9OTFkxGTAXBgNVBAMMEFRlc3QgU2VydmVyIENlcnQw +ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDzhPOSNtyyRspmeuUpxfNJ +KCLTuf7g3uQ4zu4iHOmRO5TQci+HhVlLZrHF9XqFXcIP0y4pWDbMSGuiorUmzmfi +R7bfSdI/+qIQt8KXRH6HNG1t8ou0VSvWId5TS5Dq/er5ODUr9OaaDva7EquHIcMv +vPQGuI+OEAcnleVCy9HVEIySrO4P3CNIicnGkwwiAud05yUAq/gPXBC1hTtmlPD7 +TVcGVSEiJdvzqqlgv02qedGrkki6GY4S7GjZxrrf7Foc2EP+51LJzwLQx3/JfrCU +41NEWAsu/Sl0tQabXESN+zJ1pDqoZ3uHMgpQjeGiE0olr+YcsSW/tJmiU9OiAr8R +AgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJYIZI +AYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQW +BBSCvM8AABPR9zklmifnr9LvIBturDAfBgNVHSMEGDAWgBQ2w2yI55X+sL3szj49 +hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEAqb1NV0B0/pbpK9Z4/bNjzPQLTRLK +WnSNm/Jh5v0GEUOE/Beg7GNjNrmeNmqxAlpqWz9qoeoFZax+QBpIZYjROU3TS3fp +yLsrnlr0CDQ5R7kCCDGa8dkXxemmpZZLbUCpW2Uoy8sAA4JjN9OtsZY7dvUXFgJ7 +vVNTRnI01ghknbtD+2SxSQd3CWF6QhcRMAzZJ1z1cbbwGDDzfvGFPzJ+Sq+zEPds +xoVLLSetCiBc+40ZcDS5dV98h9XD7JMTQfxzA7mNGv73JoZJA6nFgj+ADSlJsY/t +JBv+z1iQRueoh9Qeee+ZbRifPouCB8FDx+AltvHTANdAq0t/K3o+pplMVA== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQCdoWk/3+WcMlfjIrkg40ketmnQaEogQe1LLcuOJV6rKfUSAsPg -wgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp22Jp85PmemiDzyUIStwk72qhp1imbANZ -vlmlCFKiQrjUyuDfu4TABmn+kkt3vR1YBEOGt+IFye1UBVSATVdRJ2UVhwIDAQAB -AoGAba4fTtuap5l7/8ZsbE7Z1O32KJY4ZcOZukLOLUUhXxXduT+FTgGWujc0/rgc -z9qYCLlNZHOouMYTgtSfYvuMuLZ11VIt0GYH+nRioLShE59Yy+zCRyC+gPigS1kz -xvo14AsOIPYV14Tk/SsHyq6E0eTk7VzaIE197giiINUERPECQQDSKmtPTh/lRKw7 -HSZSM0I1mFWn/1zqrAbontRQY5w98QWIOe5qmzYyFbPXYT3d9BzlsMyhgiRNoBbD -yvohSHXJAkEAwAHx6ezAZeWWzD5yXD36nyjpkVCw7Tk7TSmOceLJMWt1QcrCfqlS -xA5jjpQ6Z8suU5DdtWAryM2sAir1WisYzwJAd6Zcx56jvAQ3xcPXsE6scBTVFzrj -7FqZ6E+cclPzfLQ+QQsyOBE7bpI6e/FJppY26XGZXo3YGzV8IGXrt40oOQJALETG -h86EFXo3qGOFbmsDy4pdP5nBERCu8X1xUCSfintiD4c2DInxgS5oGclnJeMcjTvL -QjQoJCX3UJCi/OUO1QJBAKgcDHWjMvt+l1pjJBsSEZ0HX9AAIIVx0RQmbFGS+F2Q -hhu5l77WnnZOQ9vvhV5u7NPCUF9nhU3jh60qWWO8mkc= +MIIEpAIBAAKCAQEA84TzkjbcskbKZnrlKcXzSSgi07n+4N7kOM7uIhzpkTuU0HIv +h4VZS2axxfV6hV3CD9MuKVg2zEhroqK1Js5n4ke230nSP/qiELfCl0R+hzRtbfKL +tFUr1iHeU0uQ6v3q+Tg1K/Tmmg72uxKrhyHDL7z0BriPjhAHJ5XlQsvR1RCMkqzu +D9wjSInJxpMMIgLndOclAKv4D1wQtYU7ZpTw+01XBlUhIiXb86qpYL9NqnnRq5JI +uhmOEuxo2ca63+xaHNhD/udSyc8C0Md/yX6wlONTRFgLLv0pdLUGm1xEjfsydaQ6 +qGd7hzIKUI3hohNKJa/mHLElv7SZolPTogK/EQIDAQABAoIBAADq9FwNtuE5IRQn +zGtO4q7Y5uCzZ8GDNYr9RKp+P2cbuWDbvVAecYq2NV9QoIiWJOAYZKklOvekIju3 +r0UZLA0PRiIrTg6NrESx3JrjWDK8QNlUO7CPTZ39/K+FrmMkV9lem9yxjJjyC34D +AQB+YRTx+l14HppjdxNwHjAVQpIx/uO2F5xAMuk32+3K+pq9CZUtrofe1q4Agj9R +5s8mSy9pbRo9kW9wl5xdEotz1LivFOEiqPUJTUq5J5PeMKao3vdK726XI4Z455Nm +W2/MA0YV0ug2FYinHcZdvKM6dimH8GLfa3X8xKRfzjGjTiMSwsdjgMa4awY3tEHH +674jhAECgYEA/zqMrc0zsbNk83sjgaYIug5kzEpN4ic020rSZsmQxSCerJTgNhmg +utKSCt0Re09Jt3LqG48msahX8ycqDsHNvlEGPQSbMu9IYeO3Wr3fAm75GEtFWePY +BhM73I7gkRt4s8bUiUepMG/wY45c5tRF23xi8foReHFFe9MDzh8fJFECgYEA9EFX +4qAik1pOJGNei9BMwmx0I0gfVEIgu0tzeVqT45vcxbxr7RkTEaDoAG6PlbWP6D9a +WQNLp4gsgRM90ZXOJ4up5DsAWDluvaF4/omabMA+MJJ5kGZ0gCj5rbZbKqUws7x8 +bp+6iBfUPJUbcqNqFmi/08Yt7vrDnMnyMw2A/sECgYEAiiuRMxnuzVm34hQcsbhH +6ymVqf7j0PW2qK0F4H1ocT9qhzWFd+RB3kHWrCjnqODQoI6GbGr/4JepHUpre1ex +4UEN5oSS3G0ru0rC3U4C59dZ5KwDHFm7ffZ1pr52ljfQDUsrjjIMRtuiwNK2OoRa +WSsqiaL+SDzSB+nBmpnAizECgYBdt/y6rerWUx4MhDwwtTnel7JwHyo2MDFS6/5g +n8qC2Lj6/fMDRE22w+CA2esp7EJNQJGv+b27iFpbJEDh+/Lf5YzIT4MwVskQ5bYB +JFcmRxUVmf4e09D7o705U/DjCgMH09iCsbLmqQ38ONIRSHZaJtMDtNTHD1yi+jF+ +OT43gQKBgQC/2OHZoko6iRlNOAQ/tMVFNq7fL81GivoQ9F1U0Qr+DH3ZfaH8eIkX +xT0ToMPJUzWAn8pZv0snA0um6SIgvkCuxO84OkANCVbttzXImIsL7pFzfcwV/ERK +UM6j0ZuSMFOCr/lGPAoOQU0fskidGEHi1/kW+suSr28TqsyYZpwBDQ== -----END RSA PRIVATE KEY----- -subject=/C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -notBefore=941104185834Z -notAfter =991103185834Z ------BEGIN X509 CERTIFICATE----- - -MIICIzCCAZACBQJBAAAWMA0GCSqGSIb3DQEBAgUAMFwxCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVy -Y2lhbCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDQxODU4MzRaFw05 -OTExMDMxODU4MzRaMFwxCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0YSBT -ZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVyY2lhbCBDZXJ0aWZpY2F0aW9u -IEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCk+4Fie84QJ93o -975sbsZwmdu41QUDaSiCnHJ/lj+O7Kwpkj+KFPhCdr69XQO5kNTQvAayUTNfxMK/ -touPmbZiImDd298ggrTKoi8tUO2UMt7gVY3UaOLgTNLNBRYulWZcYVI4HlGogqHE -7yXpCuaLK44xZtn42f29O2nZ6wIDAQABMA0GCSqGSIb3DQEBAgUAA34AdrW2EP4j -9/dZYkuwX5zBaLxJu7NJbyFHXSudVMQAKD+YufKKg5tgf+tQx6sFEC097TgCwaVI -0v5loMC86qYjFmZsGySp8+x5NRhPJsjjr1BKx6cxa9B8GJ1Qv6km+iYrRpwUqbtb -MJhCKLVLU7tDCZJAuqiqWqTGtotXTcU= ------END X509 CERTIFICATE----- -subject=/C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -notBefore=941109235417Z -notAfter =991231235417Z ------BEGIN X509 CERTIFICATE----- - -MIICKTCCAZYCBQJBAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJl -IFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDkyMzU0MTda -Fw05OTEyMzEyMzU0MTdaMF8xCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0 -YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJlIFNlcnZlciBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCSznrB -roM+WqqJg1esJQF2DK2ujiw3zus1eGRUA+WEQFHJv48I4oqCCNIWhjdV6bEhAq12 -aIGaBaJLyUslZiJWbIgHj/eBWW2EB2VwE3F2Ppt3TONQiVaYSLkdpykaEy5KEVmc -HhXVSVQsczppgrGXOZxtcGdI5d0t1sgeewIDAQABMA0GCSqGSIb3DQEBAgUAA34A -iNHReSHO4ovo+MF9NFM/YYPZtgs4F7boviGNjwC4i1N+RGceIr2XJ+CchcxK9oU7 -suK+ktPlDemvXA4MRpX/oRxePug2WHpzpgr4IhFrwwk4fia7c+8AvQKk8xQNMD9h -cHsg/jKjn7P0Z1LctO6EjJY2IN6BCINxIYoPnqk= ------END X509 CERTIFICATE----- -subject=/C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com -issuer= /C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com ------BEGIN CERTIFICATE----- -MIIC+TCCAmICAQAwDQYJKoZIhvcNAQEEBQAwgcQxCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkq -hkiG9w0BCQEWF3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMB4XDTk2MDcyNzE4MDc1 -N1oXDTk4MDcyNzE4MDc1N1owgcQxCzAJBgNVBAYTAlpBMRUwEwYDVQQIEwxXZXN0 -ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMUVGhhd3RlIENv -bnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2VydmljZXMgRGl2 -aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkqhkiG9w0BCQEW -F3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB -iQKBgQDTpFBuyP9Wa+bPXbbqDGh1R6KqwtqEJfyo9EdR2oW1IHSUhh4PdcnpCGH1 -Bm0wbhUZAulSwGLbTZme4moMRDjN/r7jZAlwxf6xaym2L0nIO9QnBCUQly/nkG3A -KEKZ10xD3sP1IW1Un13DWOHA5NlbsLjctHvfNjrCtWYiEtaHDQIDAQABMA0GCSqG -SIb3DQEBBAUAA4GBAIsvn7ifX3RUIrvYXtpI4DOfARkTogwm6o7OwVdl93yFhDcX -7h5t0XZ11MUAMziKdde3rmTvzUYIUCYoY5b032IwGMTvdiclK+STN6NP2m5nvFAM -qJT5gC5O+j/jBuZRQ4i0AMYQr5F4lT8oBJnhgafw6PL8aDY2vMHGSPl9+7uf ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIIDDTCCAnYCAQAwDQYJKoZIhvcNAQEEBQAwgc4xCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNlcnZlciBD -QTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNvbTAeFw05 -NjA3MjcxODA3MTRaFw05ODA3MjcxODA3MTRaMIHOMQswCQYDVQQGEwJaQTEVMBMG -A1UECBMMV2VzdGVybiBDYXBlMRIwEAYDVQQHEwlDYXBlIFRvd24xHTAbBgNVBAoT -FFRoYXd0ZSBDb25zdWx0aW5nIGNjMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9uIFNl -cnZpY2VzIERpdmlzaW9uMSEwHwYDVQQDExhUaGF3dGUgUHJlbWl1bSBTZXJ2ZXIg -Q0ExKDAmBgkqhkiG9w0BCQEWGXByZW1pdW0tc2VydmVyQHRoYXd0ZS5jb20wgZ8w -DQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANI2NmqL18JbntqBQWKPOO5JBFXW0O8c -G5UWR+8YSDU6UvQragaPOy/qVuOvho2eF/eetGV1Ak3vywmiIVHYm9Bn0LoNkgYU -c9STy5cqAJxcTgy8+hVS/PJEbtoRSm4Iny8t4/mqOoZztkZTWMiJBb2DEbhzP6oH -jfRCTedAnRw3AgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAutFIgTRZVYerIZfL9lvR -w9Eifvvo5KTZ3h+Bj+VzNnyw4Qc/IyXkPOu6SIiH9LQ3sCmWBdxpe+qr4l77rLj2 -GYuMtESFfn1XVALzkYgC7JcPuTOjMfIiMByt+uFf8AV8x0IW/Qkuv+hEQcyM9vxK -3VZdLbCVIhNoEsysrxCpxcI= ------END CERTIFICATE----- -Tims test GCI CA - ------BEGIN CERTIFICATE----- -MIIB8DCCAZoCAQAwDQYJKoZIhvcNAQEEBQAwgYIxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2RldmVsb3BtZW50MRkwFwYDVQQDExBD -cnlwdFNvZnQgRGV2IENBMB4XDTk3MDMyMjEzMzQwNFoXDTk4MDMyMjEzMzQwNFow -gYIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhC -cmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2Rl -dmVsb3BtZW50MRkwFwYDVQQDExBDcnlwdFNvZnQgRGV2IENBMFwwDQYJKoZIhvcN -AQEBBQADSwAwSAJBAOAOAqogG5QwAmLhzyO4CoRnx/wVy4NZP4dxJy83O1EnL0rw -OdsamJKvPOLHgSXo3gDu9uVyvCf/QJmZAmC5ml8CAwEAATANBgkqhkiG9w0BAQQF -AANBADRRS/GVdd7rAqRW6SdmgLJduOU2yq3avBu99kRqbp9A/dLu6r6jU+eP4oOA -TfdbFZtAAD2Hx9jUtY3tfdrJOb8= ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIICVjCCAgACAQAwDQYJKoZIhvcNAQEEBQAwgbUxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsTI1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9O -IEFVVEhPUklUSUVTMTQwMgYDVQQDEytaRVJPIFZBTFVFIENBIC0gREVNT05TVFJB -VElPTiBQVVJQT1NFUyBPTkxZMB4XDTk3MDQwMzEzMjI1NFoXDTk4MDQwMzEzMjI1 -NFowgbUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsT -I1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9OIEFVVEhPUklUSUVTMTQwMgYDVQQDEyta -RVJPIFZBTFVFIENBIC0gREVNT05TVFJBVElPTiBQVVJQT1NFUyBPTkxZMFwwDQYJ -KoZIhvcNAQEBBQADSwAwSAJBAOZ7T7yqP/tyspcko3yPY1y0Cm2EmwNvzW4QgVXR -Fjs3HmJ4xtSpXdo6mwcGezL3Abt/aQXaxv9PU8xt+Jr0OFUCAwEAATANBgkqhkiG -9w0BAQQFAANBAOQpYmGgyCqCy1OljgJhCqQOu627oVlHzK1L+t9vBaMfn40AVUR4 -WzQVWO31KTgi5vTK1U+3h46fgUWqQ0h+6rU= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIAwgKADAgECAgEAMA0GCSqGSIb3DQEBBAUAMGIxETAPBgNVBAcTCEludGVybmV0 -MRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xh -c3MgMSBDQSAtIEluZGl2aWR1YWwgU3Vic2NyaWJlcjAeFw05NjA0MDgxMDIwMjda -Fw05NzA0MDgxMDIwMjdaMGIxETAPBgNVBAcTCEludGVybmV0MRcwFQYDVQQKEw5W -ZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xhc3MgMSBDQSAtIElu -ZGl2aWR1YWwgU3Vic2NyaWJlcjCAMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC2 -FKbPTdAFDdjKI9BvqrQpkmOOLPhvltcunXZLEbE2jVfJw/0cxrr+Hgi6M8qV6r7j -W80GqLd5HUQq7XPysVKDaBBwZJHXPmv5912dFEObbpdFmIFH0S3L3bty10w/cari -QPJUObwW7s987LrbP2wqsxaxhhKdrpM01bjV0Pc+qQIDAQABAAAAADANBgkqhkiG -9w0BAQQFAAOBgQA+1nJryNt8VBRjRr07ArDAV/3jAH7GjDc9jsrxZS68ost9v06C -TvTNKGL+LISNmFLXl+JXhgGB0JZ9fvyYzNgHQ46HBUng1H6voalfJgS2KdEo50wW -8EFZYMDkT1k4uynwJqkVN2QJK/2q4/A/VCov5h6SlM8Affg2W+1TLqvqkwAA ------END CERTIFICATE----- - - subject=/L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - issuer= /L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - ------BEGIN CERTIFICATE----- -MIIEkzCCA/ygAwIBAgIRANDTUpSRL3nTFeMrMayFSPAwDQYJKoZIhvcNAQECBQAw -YjERMA8GA1UEBxMISW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQw -MgYDVQQLEytWZXJpU2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3Jp -YmVyMB4XDTk2MDYwNDAwMDAwMFoXDTk4MDYwNDIzNTk1OVowYjERMA8GA1UEBxMI -SW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJp -U2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3JpYmVyMIGfMA0GCSqG -SIb3DQEBAQUAA4GNADCBiQKBgQC6A+2czKGRcYMfm8gdnk+0de99TDDzsqo0v5nb -RsbUmMcdRQ7nsMbRWe0SAb/9QoLTZ/cJ0iOBqdrkz7UpqqKarVoTSdlSMVM92tWp -3bJncZHQD1t4xd6lQVdI1/T6R+5J0T1ukOdsI9Jmf+F28S6g3R3L1SFwiHKeZKZv -z+793wIDAQABo4ICRzCCAkMwggIpBgNVHQMBAf8EggIdMIICGTCCAhUwggIRBgtg -hkgBhvhFAQcBATCCAgAWggGrVGhpcyBjZXJ0aWZpY2F0ZSBpbmNvcnBvcmF0ZXMg -YnkgcmVmZXJlbmNlLCBhbmQgaXRzIHVzZSBpcyBzdHJpY3RseSBzdWJqZWN0IHRv -LCB0aGUgVmVyaVNpZ24gQ2VydGlmaWNhdGlvbiBQcmFjdGljZSBTdGF0ZW1lbnQg -KENQUyksIGF2YWlsYWJsZSBhdDogaHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL0NQ -Uy0xLjA7IGJ5IEUtbWFpbCBhdCBDUFMtcmVxdWVzdHNAdmVyaXNpZ24uY29tOyBv -ciBieSBtYWlsIGF0IFZlcmlTaWduLCBJbmMuLCAyNTkzIENvYXN0IEF2ZS4sIE1v -dW50YWluIFZpZXcsIENBIDk0MDQzIFVTQSBUZWwuICsxICg0MTUpIDk2MS04ODMw -IENvcHlyaWdodCAoYykgMTk5NiBWZXJpU2lnbiwgSW5jLiAgQWxsIFJpZ2h0cyBS -ZXNlcnZlZC4gQ0VSVEFJTiBXQVJSQU5USUVTIERJU0NMQUlNRUQgYW5kIExJQUJJ -TElUWSBMSU1JVEVELqAOBgxghkgBhvhFAQcBAQGhDgYMYIZIAYb4RQEHAQECMC8w -LRYraHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL3JlcG9zaXRvcnkvQ1BTLTEuMDAU -BglghkgBhvhCAQEBAf8EBAMCAgQwDQYJKoZIhvcNAQECBQADgYEApRJRkNBqLLgs -53IR/d18ODdLOWMTZ+QOOxBrq460iBEdUwgF8vmPRX1ku7UiDeNzaLlurE6eFqHq -2zPyK5j60zfTLVJMWKcQWwTJLjHtXrW8pxhNtFc6Fdvy5ZkHnC/9NIl7/t4U6WqB -p4y+p7SdMIkEwIZfds0VbnQyX5MRUJY= ------END CERTIFICATE----- - - subject=/C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKhAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAyVxZ -nvIbigEUtBDfBEDb41evakVAj4QMC9Ez2dkRz+4CWB8l9yqoRAWq7AMfeH+ek7ma -AKojfdashaJjRcdyJ8z0TMZ1cdI5709C8HXfCpDGjiBvmA/4rCNfcCk2pMmG57Ga -IMtTpYXnPb59mv4kRTPcdhXtD6JxZExlLoFoRacCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQB1Zmw+0c2B27X4LzZRtvdCvM1Cr9wO+hVs+GeTVzrrtpLotgHKjLeOQ7RJ -Zfk+7r11Ri7J/CVdqMcvi5uPaM+0nJcYwE3vH9mvgrPmZLiEXIqaB1JDYft0nls6 -NvxMsvwaPxUupVs8G5DsiCnkWRb5zget7Ond2tIxik/W2O8XjQ== ------END CERTIFICATE----- - subject=/C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKmAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0LJ1 -9njQrlpQ9OlQqZ+M1++RlHDo0iSQdomF1t+s5gEXMoDwnZNHvJplnR+Xrr/phnVj -IIm9gFidBAydqMEk6QvlMXi9/C0MN2qeeIDpRnX57aP7E3vIwUzSo+/1PLBij0pd -O92VZ48TucE81qcmm+zDO3rZTbxtm+gVAePwR6kCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQBT3dPwnCR+QKri/AAa19oM/DJhuBUNlvP6Vxt/M3yv6ZiaYch6s7f/sdyZ -g9ysEvxwyR84Qu1E9oAuW2szaayc01znX1oYx7EteQSWQZGZQbE8DbqEOcY7l/Am -yY7uvcxClf8exwI/VAx49byqYHwCaejcrOICdmHEPgPq0ook0Q== ------END CERTIFICATE----- diff --git a/app/openssl/apps/server2.pem b/app/openssl/apps/server2.pem index 8bb66419..a3927cf7 100644 --- a/app/openssl/apps/server2.pem +++ b/app/openssl/apps/server2.pem @@ -1,376 +1,52 @@ -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Server test cert (1024 bit) ------BEGIN CERTIFICATE----- -MIICLjCCAZcCAQEwDQYJKoZIhvcNAQEEBQAwWzELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYD -VQQDExJUZXN0IENBICgxMDI0IGJpdCkwHhcNOTcwNjA5MTM1NzU0WhcNOTgwNjA5 -MTM1NzU0WjBkMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDEaMBgG -A1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxJDAiBgNVBAMTG1NlcnZlciB0ZXN0IGNl -cnQgKDEwMjQgYml0KTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAsxH1PBPm -RkxrR11eV4bzNi4N9n11CI8nV29+ARlT1+qDe/mjVUvXlmsr1v/vf71G9GgqopSa -6RXrICLVdk/FYYYzhPvl1M+OrjaXDFO8BzBAF1Lnz6c7aRZvGRJNrRSr2nZEkqDf -JW9dY7r2VZEpD5QeuaRYUnuECkqeieB65GMCAwEAATANBgkqhkiG9w0BAQQFAAOB -gQCWsOta6C0wiVzXz8wPmJKyTrurMlgUss2iSuW9366iwofZddsNg7FXniMzkIf6 -dp7jnmWZwKZ9cXsNUS2o4OL07qOk2HOywC0YsNZQsOBu1CBTYYkIefDiKFL1zQHh -8lwwNd4NP+OE3NzUNkCfh4DnFfg9WHkXUlD5UpxNRJ4gJA== ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIICXgIBAAKBgQCzEfU8E+ZGTGtHXV5XhvM2Lg32fXUIjydXb34BGVPX6oN7+aNV -S9eWayvW/+9/vUb0aCqilJrpFesgItV2T8VhhjOE++XUz46uNpcMU7wHMEAXUufP -pztpFm8ZEk2tFKvadkSSoN8lb11juvZVkSkPlB65pFhSe4QKSp6J4HrkYwIDAQAB -AoGBAKy8jvb0Lzby8q11yNLf7+78wCVdYi7ugMHcYA1JVFK8+zb1WfSm44FLQo/0 -dSChAjgz36TTexeLODPYxleJndjVcOMVzsLJjSM8dLpXsTS4FCeMbhw2s2u+xqKY -bbPWfk+HOTyJjfnkcC5Nbg44eOmruq0gSmBeUXVM5UntlTnxAkEA7TGCA3h7kx5E -Bl4zl2pc3gPAGt+dyfk5Po9mGJUUXhF5p2zueGmYWW74TmOWB1kzt4QRdYMzFePq -zfDNXEa1CwJBAMFErdY0xp0UJ13WwBbUTk8rujqQdHtjw0klhpbuKkjxu2hN0wwM -6p0D9qxF7JHaghqVRI0fAW/EE0OzdHMR9QkCQQDNR26dMFXKsoPu+vItljj/UEGf -QG7gERiQ4yxaFBPHgdpGo0kT31eh9x9hQGDkxTe0GNG/YSgCRvm8+C3TMcKXAkBD -dhGn36wkUFCddMSAM4NSJ1VN8/Z0y5HzCmI8dM3VwGtGMUQlxKxwOl30LEQzdS5M -0SWojNYXiT2gOBfBwtbhAkEAhafl5QEOIgUz+XazS/IlZ8goNKdDVfYgK3mHHjvv -nY5G+AuGebdNkXJr4KSWxDcN+C2i47zuj4QXA16MAOandA== ------END RSA PRIVATE KEY----- -subject=/C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -issuer= /C=US/O=AT&T Bell Laboratories/OU=Prototype Research CA -notBefore=950413210656Z -notAfter =970412210656Z ------BEGIN X509 CERTIFICATE----- - -MIICCDCCAXECAQAwDQYJKoZIhvcNAQEEBQAwTjELMAkGA1UEBhMCVVMxHzAdBgNV -BAoUFkFUJlQgQmVsbCBMYWJvcmF0b3JpZXMxHjAcBgNVBAsUFVByb3RvdHlwZSBS -ZXNlYXJjaCBDQTAeFw05NTA0MTMyMTA2NTZaFw05NzA0MTIyMTA2NTZaME4xCzAJ -BgNVBAYTAlVTMR8wHQYDVQQKFBZBVCZUIEJlbGwgTGFib3JhdG9yaWVzMR4wHAYD -VQQLFBVQcm90b3R5cGUgUmVzZWFyY2ggQ0EwgZwwDQYJKoZIhvcNAQEBBQADgYoA -MIGGAoGAebOmgtSCl+wCYZc86UGYeTLY8cjmW2P0FN8ToT/u2pECCoFdrlycX0OR -3wt0ZhpFXLVNeDnHwEE9veNUih7pCL2ZBFqoIoQkB1lZmXRiVtjGonz8BLm/qrFM -YHb0lme/Ol+s118mwKVxnn6bSAeI/OXKhLaVdYZWk+aEaxEDkVkCAQ8wDQYJKoZI -hvcNAQEEBQADgYEAAZMG14lZmZ8bahkaHaTV9dQf4p2FZiQTFwHP9ZyGsXPC+LT5 -dG5iTaRmyjNIJdPWohZDl97kAci79aBndvuEvRKOjLHs3WRGBIwERnAcnY9Mz8u/ -zIHK23PjYVxGGaZd669OJwD0CYyqH22HH9nFUGaoJdsv39ChW0NRdLE9+y8= ------END X509 CERTIFICATE----- -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test CA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJjCCAY8CAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTc0M1oXDTAxMDYw -OTEzNTc0M1owWzELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRswGQYDVQQDExJUZXN0IENBICgxMDI0 -IGJpdCkwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAKO7o8t116VP6cgybTsZ -DCZhr95nYlZuya3aCi1IKoztqwWnjbmDFIriOqGFPrZQ+moMETC9D59iRW/dFXSv -1F65ka/XY2hLh9exCCo7XuUcDs53Qp3bI3AmMqHjgzE8oO3ajyJAzJkTTOUecQU2 -mw/gI4tMM0LqWMQS7luTy4+xAgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAM7achv3v -hLQJcv/65eGEpBXM40ZDVoFQFFJWaY5p883HTqLB1x4FdzsXHH0QKBTcKpWwqyu4 -YDm3fb8oDugw72bCzfyZK/zVZPR/hVlqI/fvU109Qoc+7oPvIXWky71HfcK6ZBCA -q30KIqGM/uoM60INq97qjDmCJapagcNBGQs= ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIICXQIBAAKBgQCju6PLddelT+nIMm07GQwmYa/eZ2JWbsmt2gotSCqM7asFp425 -gxSK4jqhhT62UPpqDBEwvQ+fYkVv3RV0r9ReuZGv12NoS4fXsQgqO17lHA7Od0Kd -2yNwJjKh44MxPKDt2o8iQMyZE0zlHnEFNpsP4COLTDNC6ljEEu5bk8uPsQIDAQAB -AoGAVZmpFZsDZfr0l2S9tLLwpjRWNOlKATQkno6q2WesT0eGLQufTciY+c8ypfU6 -hyio8r5iUl/VhhdjhAtKx1mRpiotftHo/eYf8rtsrnprOnWG0bWjLjtIoMbcxGn2 -J3bN6LJmbJMjDs0eJ3KnTu646F3nDUw2oGAwmpzKXA1KAP0CQQDRvQhxk2D3Pehs -HvG665u2pB5ipYQngEFlZO7RHJZzJOZEWSLuuMqaF/7pTfA5jiBvWqCgJeCRRInL -21ru4dlPAkEAx9jj7BgKn5TYnMoBSSe0afjsV9oApVpN1Nacb1YDtCwy+scp3++s -nFxlv98wxIlSdpwMUn+AUWfjiWR7Tu/G/wJBAJ/KjwZIrFVxewP0x2ILYsTRYLzz -MS4PDsO7FB+I0i7DbBOifXS2oNSpd3I0CNMwrxFnUHzynpbOStVfN3ZL5w0CQQCa -pwFahxBRhkJKsxhjoFJBX9yl75JoY4Wvm5Tbo9ih6UJaRx3kqfkN14L2BKYcsZgb -KY9vmDOYy6iNfjDeWTfJAkBkfPUb8oTJ/nSP5zN6sqGxSY4krc4xLxpRmxoJ8HL2 -XfhqXkTzbU13RX9JJ/NZ8vQN9Vm2NhxRGJocQkmcdVtJ ------END RSA PRIVATE KEY----- ------BEGIN X509 CERTIFICATE----- -MIICYDCCAiACAgEoMAkGBSsOAwINBQAwfDELMAkGA1UEBhMCVVMxNjA0BgNVBAoT -LU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFuZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZ -MBcGA1UECxMQVGVzdCBFbnZpcm9ubWVudDEaMBgGA1UECxMRRFNTLU5BU0EtUGls -b3QtQ0EwHhcNOTYwMjI2MTYzMjQ1WhcNOTcwMjI1MTYzMjQ1WjB8MQswCQYDVQQG -EwJVUzE2MDQGA1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFk -bWluaXN0cmF0aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MRowGAYDVQQL -ExFEU1MtTkFTQS1QaWxvdC1DQTCB8jAJBgUrDgMCDAUAA4HkADCB4AJBAMA/ssKb -hPNUG7ZlASfVwEJU21O5OyF/iyBzgHI1O8eOhJGUYO8cc8wDMjR508Mr9cp6Uhl/ -ZB7FV5GkLNEnRHYCQQDUEaSg45P2qrDwixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLb -bn3QK74T2IxY1yY+kCNq8XrIqf5fJJzIH0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3 -fVd0geUCQQCzCFUQAh+ZkEmp5804cs6ZWBhrUAfnra8lJItYo9xPcXgdIfLfibcX -R71UsyO77MRD7B0+Ag2tq794IleCVcEEMAkGBSsOAwINBQADLwAwLAIUUayDfreR -Yh2WeU86/pHNdkUC1IgCFEfxe1f0oMpxJyrJ5XIxTi7vGdoK ------END X509 CERTIFICATE----- ------BEGIN X509 CERTIFICATE----- - -MIICGTCCAdgCAwCqTDAJBgUrDgMCDQUAMHwxCzAJBgNVBAYTAlVTMTYwNAYDVQQK -Ey1OYXRpb25hbCBBZXJvbmF1dGljcyBhbmQgU3BhY2UgQWRtaW5pc3RyYXRpb24x -GTAXBgNVBAsTEFRlc3QgRW52aXJvbm1lbnQxGjAYBgNVBAsTEURTUy1OQVNBLVBp -bG90LUNBMB4XDTk2MDUxNDE3MDE0MVoXDTk3MDUxNDE3MDE0MVowMzELMAkGA1UE -BhMCQVUxDzANBgNVBAoTBk1pbmNvbTETMBEGA1UEAxMKRXJpYyBZb3VuZzCB8jAJ -BgUrDgMCDAUAA4HkADCB4AJBAKbfHz6vE6pXXMTpswtGUec2tvnfLJUsoxE9qs4+ -ObZX7LmLvragNPUeiTJx7UOWZ5DfBj6bXLc8eYne0lP1g3ACQQDUEaSg45P2qrDw -ixTRhFhmWz5Nvc4lRFQ/42XPcchiJBLbbn3QK74T2IxY1yY+kCNq8XrIqf5fJJzI -H0J/xUP3AhUAsg2wsQHfDGYk/BOSulX3fVd0geUCQQCzCFUQAh+ZkEmp5804cs6Z -WBhrUAfnra8lJItYo9xPcXgdIfLfibcXR71UsyO77MRD7B0+Ag2tq794IleCVcEE -MAkGBSsOAwINBQADMAAwLQIUWsuuJRE3VT4ueWkWMAJMJaZjj1ECFQCYY0zX4bzM -LC7obsrHD8XAHG+ZRG== ------END X509 CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICTTCCAbagAwIBAgIBADANBgkqhkiG9w0BAQQFADBMMQswCQYDVQQGEwJHQjEM -MAoGA1UEChMDVUNMMRgwFgYDVQQLEw9JQ0UtVEVMIFByb2plY3QxFTATBgNVBAMT -DFRydXN0RmFjdG9yeTAeFw05NzA0MjIxNDM5MTRaFw05ODA0MjIxNDM5MTRaMEwx -CzAJBgNVBAYTAkdCMQwwCgYDVQQKEwNVQ0wxGDAWBgNVBAsTD0lDRS1URUwgUHJv -amVjdDEVMBMGA1UEAxMMVHJ1c3RGYWN0b3J5MIGcMAoGBFUIAQECAgQAA4GNADCB -iQKBgQCEieR8NcXkUW1f0G6aC6u0i8q/98JqS6RxK5YmHIGKCkuTWAUjzLfUa4dt -U9igGCjTuxaDqlzEim+t/02pmiBZT9HaX++35MjQPUWmsChcYU5WyzGErXi+rQaw -zlwS73zM8qiPj/97lXYycWhgL0VaiDSPxRXEUdWoaGruom4mNQIDAQABo0IwQDAd -BgNVHQ4EFgQUHal1LZr7oVg5z6lYzrhTgZRCmcUwDgYDVR0PAQH/BAQDAgH2MA8G -A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAfaggfl6FZoioecjv0dq8 -/DXo/u11iMZvXn08gjX/zl2b4wtPbShOSY5FhkSm8GeySasz+/Nwb/uzfnIhokWi -lfPZHtlCWtXbIy/TN51eJyq04ceDCQDWvLC2enVg9KB+GJ34b5c5VaPRzq8MBxsA -S7ELuYGtmYgYm9NZOIr7yU0= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIIB6jCCAZQCAgEtMA0GCSqGSIb3DQEBBAUAMIGAMQswCQYDVQQGEwJVUzE2MDQG -A1UEChMtTmF0aW9uYWwgQWVyb25hdXRpY3MgYW5kIFNwYWNlIEFkbWluaXN0cmF0 -aW9uMRkwFwYDVQQLExBUZXN0IEVudmlyb25tZW50MR4wHAYDVQQLExVNRDUtUlNB -LU5BU0EtUGlsb3QtQ0EwHhcNOTYwNDMwMjIwNTAwWhcNOTcwNDMwMjIwNTAwWjCB -gDELMAkGA1UEBhMCVVMxNjA0BgNVBAoTLU5hdGlvbmFsIEFlcm9uYXV0aWNzIGFu -ZCBTcGFjZSBBZG1pbmlzdHJhdGlvbjEZMBcGA1UECxMQVGVzdCBFbnZpcm9ubWVu -dDEeMBwGA1UECxMVTUQ1LVJTQS1OQVNBLVBpbG90LUNBMFkwCgYEVQgBAQICAgAD -SwAwSAJBALmmX5+GqAvcrWK13rfDrNX9UfeA7f+ijyBgeFQjYUoDpFqapw4nzQBL -bAXug8pKkRwa2Zh8YODhXsRWu2F/UckCAwEAATANBgkqhkiG9w0BAQQFAANBAH9a -OBA+QCsjxXgnSqHx04gcU8S49DVUb1f2XVoLnHlIb8RnX0k5O6mpHT5eti9bLkiW -GJNMJ4L0AJ/ac+SmHZc= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIICajCCAdMCBDGA0QUwDQYJKoZIhvcNAQEEBQAwfTELMAkGA1UEBhMCQ2ExDzAN -BgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmlsaXR5IEFjY2VwdGVkMR8w -HQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRwwGgYDVQQDExNFbnRydXN0 -IERlbW8gV2ViIENBMB4XDTk2MDQyNjEzMzUwMVoXDTA2MDQyNjEzMzUwMVowfTEL -MAkGA1UEBhMCQ2ExDzANBgNVBAcTBk5lcGVhbjEeMBwGA1UECxMVTm8gTGlhYmls -aXR5IEFjY2VwdGVkMR8wHQYDVQQKExZGb3IgRGVtbyBQdXJwb3NlcyBPbmx5MRww -GgYDVQQDExNFbnRydXN0IERlbW8gV2ViIENBMIGdMA0GCSqGSIb3DQEBAQUAA4GL -ADCBhwKBgQCaroS7O1DA0hm4IefNYU1cx/nqOmzEnk291d1XqznDeF4wEgakbkCc -zTKxK791yNpXG5RmngqH7cygDRTHZJ6mfCRn0wGC+AI00F2vYTGqPGRQL1N3lZT0 -YDKFC0SQeMMjFIZ1aeQigroFQnHo0VB3zWIMpNkka8PY9lxHZAmWwQIBAzANBgkq -hkiG9w0BAQQFAAOBgQBAx0UMVA1s54lMQyXjMX5kj99FJN5itb8bK1Rk+cegPQPF -cWO9SEWyEjjBjIkjjzAwBkaEszFsNGxemxtXvwjIm1xEUMTVlPEWTs2qnDvAUA9W -YqhWbhH0toGT36236QAsqCZ76rbTRVSSX2BHyJwJMG2tCRv7kRJ//NIgxj3H4w== ------END CERTIFICATE----- - -issuer= /C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) -subject=/C=AU/ST=Queensland/O=CryptSoft Pty Ltd/CN=Test PCA (1024 bit) ------BEGIN CERTIFICATE----- -MIICJzCCAZACAQAwDQYJKoZIhvcNAQEEBQAwXDELMAkGA1UEBhMCQVUxEzARBgNV -BAgTClF1ZWVuc2xhbmQxGjAYBgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYD -VQQDExNUZXN0IFBDQSAoMTAyNCBiaXQpMB4XDTk3MDYwOTEzNTczN1oXDTAxMDYw -OTEzNTczN1owXDELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxGjAY -BgNVBAoTEUNyeXB0U29mdCBQdHkgTHRkMRwwGgYDVQQDExNUZXN0IFBDQSAoMTAy -NCBiaXQpMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCdoWk/3+WcMlfjIrkg -40ketmnQaEogQe1LLcuOJV6rKfUSAsPgwgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp -22Jp85PmemiDzyUIStwk72qhp1imbANZvlmlCFKiQrjUyuDfu4TABmn+kkt3vR1Y -BEOGt+IFye1UBVSATVdRJ2UVhwIDAQABMA0GCSqGSIb3DQEBBAUAA4GBABNA1u/S -Cg/LJZWb7GliiKJsvuhxlE4E5JxQF2zMub/CSNbF97//tYSyj96sxeFQxZXbcjm9 -xt6mr/xNLA4szNQMJ4P+L7b5e/jC5DSqlwS+CUYJgaFs/SP+qJoCSu1bR3IM9XWO -cRBpDmcBbYLkSyB92WURvsZ1LtjEcn+cdQVI +subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert #2 +issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA +-----BEGIN CERTIFICATE----- +MIID6jCCAtKgAwIBAgIJALnu1NlVpZ60MA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT +VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt +ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZzELMAkG +A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU +RVNUSU5HIFBVUlBPU0VTIE9OTFkxHDAaBgNVBAMME1Rlc3QgU2VydmVyIENlcnQg +IzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDrdi7j9yctG+L4EjBy +gjPmEqZzOJEQba26MoQGzglU7e5Xf59Rb/hgVQuKAoiZe7/R8rK4zJ4W7iXdXw0L +qBpyG8B5aGKeI32w+A9TcBApoXXL2CrYQEQjZwUIpLlYBIi2NkJj3nVkq5dgl1gO +ALiQ+W8jg3kzg5Ec9rimp9r93N8wsSL3awsafurmYCvOf7leHaMP1WJ/zDRGUNHG +/WtDjXc8ZUG1+6EXU9Jc2Fs+2Omf7fcN0l00AK/wPg8OaNS0rKyGq9JdIT9FRGV1 +bXe/rx58FaE5CItdwCSYhJvF/O95LWQoxJXye5bCFLmvDTEyVq9FMSCptfsmbXjE +ZGsXAgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJ +YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud +DgQWBBR52UaWWTKzZGDH/X4mWNcuqeQVazAfBgNVHSMEGDAWgBQ2w2yI55X+sL3s +zj49hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEANBW+XYLlHBqVY/31ie+3gRlS +LPfy4SIqn0t3RJjagT29MXprblBO2cbMO8VGjkQdKGpmMXjxbht2arOOUXRHX4n/ +XTyn/QHEf0bcwIITMReO3DZUPAEw8hSjn9xEOM0IRVOCP+mH5fi74QzzQaZVCyYg +5VtLKdww/+sc0nCbKl2KWgDluriH0nfVx95qgW3mg9dhXRr0zmf1w2zkBHYpARYL +Dew6Z8EE4tS3HJu8/qM6meWzNtrfonQ3eiiMxjZBxzV46jchBwa2z9XYhP6AmpPb +oeTSzcQNbWsxaGYzWo46oLDUZmJOwSBawbS31bZNMCoPIY6ukoesCzFSsUKZww== -----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQCdoWk/3+WcMlfjIrkg40ketmnQaEogQe1LLcuOJV6rKfUSAsPg -wgsabJ/wn8TxA1yy3eKJbFl3OiUXMRsp22Jp85PmemiDzyUIStwk72qhp1imbANZ -vlmlCFKiQrjUyuDfu4TABmn+kkt3vR1YBEOGt+IFye1UBVSATVdRJ2UVhwIDAQAB -AoGAba4fTtuap5l7/8ZsbE7Z1O32KJY4ZcOZukLOLUUhXxXduT+FTgGWujc0/rgc -z9qYCLlNZHOouMYTgtSfYvuMuLZ11VIt0GYH+nRioLShE59Yy+zCRyC+gPigS1kz -xvo14AsOIPYV14Tk/SsHyq6E0eTk7VzaIE197giiINUERPECQQDSKmtPTh/lRKw7 -HSZSM0I1mFWn/1zqrAbontRQY5w98QWIOe5qmzYyFbPXYT3d9BzlsMyhgiRNoBbD -yvohSHXJAkEAwAHx6ezAZeWWzD5yXD36nyjpkVCw7Tk7TSmOceLJMWt1QcrCfqlS -xA5jjpQ6Z8suU5DdtWAryM2sAir1WisYzwJAd6Zcx56jvAQ3xcPXsE6scBTVFzrj -7FqZ6E+cclPzfLQ+QQsyOBE7bpI6e/FJppY26XGZXo3YGzV8IGXrt40oOQJALETG -h86EFXo3qGOFbmsDy4pdP5nBERCu8X1xUCSfintiD4c2DInxgS5oGclnJeMcjTvL -QjQoJCX3UJCi/OUO1QJBAKgcDHWjMvt+l1pjJBsSEZ0HX9AAIIVx0RQmbFGS+F2Q -hhu5l77WnnZOQ9vvhV5u7NPCUF9nhU3jh60qWWO8mkc= +MIIEowIBAAKCAQEA63Yu4/cnLRvi+BIwcoIz5hKmcziREG2tujKEBs4JVO3uV3+f +UW/4YFULigKImXu/0fKyuMyeFu4l3V8NC6gachvAeWhiniN9sPgPU3AQKaF1y9gq +2EBEI2cFCKS5WASItjZCY951ZKuXYJdYDgC4kPlvI4N5M4ORHPa4pqfa/dzfMLEi +92sLGn7q5mArzn+5Xh2jD9Vif8w0RlDRxv1rQ413PGVBtfuhF1PSXNhbPtjpn+33 +DdJdNACv8D4PDmjUtKyshqvSXSE/RURldW13v68efBWhOQiLXcAkmISbxfzveS1k +KMSV8nuWwhS5rw0xMlavRTEgqbX7Jm14xGRrFwIDAQABAoIBAHLsTPihIfLnYIE5 +x4GsQQ5zXeBw5ITDM37ktwHnQDC+rIzyUl1aLD1AZRBoKinXd4lOTqLZ4/NHKx4A +DYr58mZtWyUmqLOMmQVuHXTZBlp7XtYuXMMNovQwjQlp9LicBeoBU6gQ5PVMtubD +F4xGF89Sn0cTHW3iMkqTtQ5KcR1j57OcJO0FEb1vPvk2MXI5ZyAatUYE7YacbEzd +rg02uIwx3FqNSkuSI79uz4hMdV5TPtuhxx9nTwj9aLUhXFeZ0mn2PVgVzEnnMoJb ++znlsZDgzDlJqdaD744YGWh8Z3OEssB35KfzFcdOeO6yH8lmv2Zfznk7pNPT7LTb +Lae9VgkCgYEA92p1qnAB3NtJtNcaW53i0S5WJgS1hxWKvUDx3lTB9s8X9fHpqL1a +E94fDfWzp/hax6FefUKIvBOukPLQ6bYjTMiFoOHzVirghAIuIUoMI5VtLhwD1hKs +Lr7l/dptMgKb1nZHyXoKHRBthsy3K4+udsPi8TzMvYElgEqyQIe/Rk0CgYEA86GL +8HC6zLszzKERDPBxrboRmoFvVUCTQDhsfj1M8aR3nQ8V5LkdIJc7Wqm/Ggfk9QRf +rJ8M2WUMlU5CNnCn/KCrKzCNZIReze3fV+HnKdbcXGLvgbHPrhnz8yYehUFG+RGq +bVyDWRU94T38izy2s5qMYrMJWZEYyXncSPbfcPMCgYAtaXfxcZ+V5xYPQFARMtiX +5nZfggvDoJuXgx0h3tK/N2HBfcaSdzbaYLG4gTmZggc/jwnl2dl5E++9oSPhUdIG +3ONSFUbxsOsGr9PBvnKd8WZZyUCXAVRjPBzAzF+whzQNWCZy/5htnz9LN7YDI9s0 +5113Q96cheDZPFydZY0hHQKBgQDVbEhNukM5xCiNcu+f2SaMnLp9EjQ4h5g3IvaP +5B16daw/Dw8LzcohWboqIxeAsze0GD/D1ZUJAEd0qBjC3g+a9BjefervCjKOzXng +38mEUm+6EwVjJSQcjSmycEs+Sr/kwr/8i5WYvU32+jk4tFgMoC+o6tQe/Uesf68k +z/dPVwKBgGbF7Vv1/3SmhlOy+zYyvJ0CrWtKxH9QP6tLIEgEpd8x7YTSuCH94yok +kToMXYA3sWNPt22GbRDZ+rcp4c7HkDx6I6vpdP9aQEwJTp0EPy0sgWr2XwYmreIQ +NFmkk8Itn9EY2R9VBaP7GLv5kvwxDdLAnmwGmzVtbmaVdxCaBwUk -----END RSA PRIVATE KEY----- -subject=/C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Commercial Certification Authority -notBefore=941104185834Z -notAfter =991103185834Z ------BEGIN X509 CERTIFICATE----- - -MIICIzCCAZACBQJBAAAWMA0GCSqGSIb3DQEBAgUAMFwxCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVy -Y2lhbCBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDQxODU4MzRaFw05 -OTExMDMxODU4MzRaMFwxCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0YSBT -ZWN1cml0eSwgSW5jLjErMCkGA1UECxMiQ29tbWVyY2lhbCBDZXJ0aWZpY2F0aW9u -IEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCk+4Fie84QJ93o -975sbsZwmdu41QUDaSiCnHJ/lj+O7Kwpkj+KFPhCdr69XQO5kNTQvAayUTNfxMK/ -touPmbZiImDd298ggrTKoi8tUO2UMt7gVY3UaOLgTNLNBRYulWZcYVI4HlGogqHE -7yXpCuaLK44xZtn42f29O2nZ6wIDAQABMA0GCSqGSIb3DQEBAgUAA34AdrW2EP4j -9/dZYkuwX5zBaLxJu7NJbyFHXSudVMQAKD+YufKKg5tgf+tQx6sFEC097TgCwaVI -0v5loMC86qYjFmZsGySp8+x5NRhPJsjjr1BKx6cxa9B8GJ1Qv6km+iYrRpwUqbtb -MJhCKLVLU7tDCZJAuqiqWqTGtotXTcU= ------END X509 CERTIFICATE----- -subject=/C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -issuer= /C=US/O=RSA Data Security, Inc./OU=Secure Server Certification Authority -notBefore=941109235417Z -notAfter =991231235417Z ------BEGIN X509 CERTIFICATE----- - -MIICKTCCAZYCBQJBAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMSAw -HgYDVQQKExdSU0EgRGF0YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJl -IFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NDExMDkyMzU0MTda -Fw05OTEyMzEyMzU0MTdaMF8xCzAJBgNVBAYTAlVTMSAwHgYDVQQKExdSU0EgRGF0 -YSBTZWN1cml0eSwgSW5jLjEuMCwGA1UECxMlU2VjdXJlIFNlcnZlciBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBmzANBgkqhkiG9w0BAQEFAAOBiQAwgYUCfgCSznrB -roM+WqqJg1esJQF2DK2ujiw3zus1eGRUA+WEQFHJv48I4oqCCNIWhjdV6bEhAq12 -aIGaBaJLyUslZiJWbIgHj/eBWW2EB2VwE3F2Ppt3TONQiVaYSLkdpykaEy5KEVmc -HhXVSVQsczppgrGXOZxtcGdI5d0t1sgeewIDAQABMA0GCSqGSIb3DQEBAgUAA34A -iNHReSHO4ovo+MF9NFM/YYPZtgs4F7boviGNjwC4i1N+RGceIr2XJ+CchcxK9oU7 -suK+ktPlDemvXA4MRpX/oRxePug2WHpzpgr4IhFrwwk4fia7c+8AvQKk8xQNMD9h -cHsg/jKjn7P0Z1LctO6EjJY2IN6BCINxIYoPnqk= ------END X509 CERTIFICATE----- -subject=/C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com -issuer= /C=ZA/SP=Western Cape/L=Cape Town/O=Thawte Consulting cc - /OU=Certification Services Division/CN=Thawte Server CA - /Email=server-certs@thawte.com ------BEGIN CERTIFICATE----- -MIIC+TCCAmICAQAwDQYJKoZIhvcNAQEEBQAwgcQxCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkq -hkiG9w0BCQEWF3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMB4XDTk2MDcyNzE4MDc1 -N1oXDTk4MDcyNzE4MDc1N1owgcQxCzAJBgNVBAYTAlpBMRUwEwYDVQQIEwxXZXN0 -ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMUVGhhd3RlIENv -bnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2VydmljZXMgRGl2 -aXNpb24xGTAXBgNVBAMTEFRoYXd0ZSBTZXJ2ZXIgQ0ExJjAkBgkqhkiG9w0BCQEW -F3NlcnZlci1jZXJ0c0B0aGF3dGUuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB -iQKBgQDTpFBuyP9Wa+bPXbbqDGh1R6KqwtqEJfyo9EdR2oW1IHSUhh4PdcnpCGH1 -Bm0wbhUZAulSwGLbTZme4moMRDjN/r7jZAlwxf6xaym2L0nIO9QnBCUQly/nkG3A -KEKZ10xD3sP1IW1Un13DWOHA5NlbsLjctHvfNjrCtWYiEtaHDQIDAQABMA0GCSqG -SIb3DQEBBAUAA4GBAIsvn7ifX3RUIrvYXtpI4DOfARkTogwm6o7OwVdl93yFhDcX -7h5t0XZ11MUAMziKdde3rmTvzUYIUCYoY5b032IwGMTvdiclK+STN6NP2m5nvFAM -qJT5gC5O+j/jBuZRQ4i0AMYQr5F4lT8oBJnhgafw6PL8aDY2vMHGSPl9+7uf ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIIDDTCCAnYCAQAwDQYJKoZIhvcNAQEEBQAwgc4xCzAJBgNVBAYTAlpBMRUwEwYD -VQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsGA1UEChMU -VGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy -dmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNlcnZlciBD -QTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNvbTAeFw05 -NjA3MjcxODA3MTRaFw05ODA3MjcxODA3MTRaMIHOMQswCQYDVQQGEwJaQTEVMBMG -A1UECBMMV2VzdGVybiBDYXBlMRIwEAYDVQQHEwlDYXBlIFRvd24xHTAbBgNVBAoT -FFRoYXd0ZSBDb25zdWx0aW5nIGNjMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9uIFNl -cnZpY2VzIERpdmlzaW9uMSEwHwYDVQQDExhUaGF3dGUgUHJlbWl1bSBTZXJ2ZXIg -Q0ExKDAmBgkqhkiG9w0BCQEWGXByZW1pdW0tc2VydmVyQHRoYXd0ZS5jb20wgZ8w -DQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANI2NmqL18JbntqBQWKPOO5JBFXW0O8c -G5UWR+8YSDU6UvQragaPOy/qVuOvho2eF/eetGV1Ak3vywmiIVHYm9Bn0LoNkgYU -c9STy5cqAJxcTgy8+hVS/PJEbtoRSm4Iny8t4/mqOoZztkZTWMiJBb2DEbhzP6oH -jfRCTedAnRw3AgMBAAEwDQYJKoZIhvcNAQEEBQADgYEAutFIgTRZVYerIZfL9lvR -w9Eifvvo5KTZ3h+Bj+VzNnyw4Qc/IyXkPOu6SIiH9LQ3sCmWBdxpe+qr4l77rLj2 -GYuMtESFfn1XVALzkYgC7JcPuTOjMfIiMByt+uFf8AV8x0IW/Qkuv+hEQcyM9vxK -3VZdLbCVIhNoEsysrxCpxcI= ------END CERTIFICATE----- -Tims test GCI CA - ------BEGIN CERTIFICATE----- -MIIB8DCCAZoCAQAwDQYJKoZIhvcNAQEEBQAwgYIxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2RldmVsb3BtZW50MRkwFwYDVQQDExBD -cnlwdFNvZnQgRGV2IENBMB4XDTk3MDMyMjEzMzQwNFoXDTk4MDMyMjEzMzQwNFow -gYIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhC -cmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxFDASBgNVBAsTC2Rl -dmVsb3BtZW50MRkwFwYDVQQDExBDcnlwdFNvZnQgRGV2IENBMFwwDQYJKoZIhvcN -AQEBBQADSwAwSAJBAOAOAqogG5QwAmLhzyO4CoRnx/wVy4NZP4dxJy83O1EnL0rw -OdsamJKvPOLHgSXo3gDu9uVyvCf/QJmZAmC5ml8CAwEAATANBgkqhkiG9w0BAQQF -AANBADRRS/GVdd7rAqRW6SdmgLJduOU2yq3avBu99kRqbp9A/dLu6r6jU+eP4oOA -TfdbFZtAAD2Hx9jUtY3tfdrJOb8= ------END CERTIFICATE----- - ------BEGIN CERTIFICATE----- -MIICVjCCAgACAQAwDQYJKoZIhvcNAQEEBQAwgbUxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5 -cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsTI1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9O -IEFVVEhPUklUSUVTMTQwMgYDVQQDEytaRVJPIFZBTFVFIENBIC0gREVNT05TVFJB -VElPTiBQVVJQT1NFUyBPTkxZMB4XDTk3MDQwMzEzMjI1NFoXDTk4MDQwMzEzMjI1 -NFowgbUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRTb2Z0IFB0eSBMdGQxLDAqBgNVBAsT -I1dPUlRITEVTUyBDRVJUSUZJQ0FUSU9OIEFVVEhPUklUSUVTMTQwMgYDVQQDEyta -RVJPIFZBTFVFIENBIC0gREVNT05TVFJBVElPTiBQVVJQT1NFUyBPTkxZMFwwDQYJ -KoZIhvcNAQEBBQADSwAwSAJBAOZ7T7yqP/tyspcko3yPY1y0Cm2EmwNvzW4QgVXR -Fjs3HmJ4xtSpXdo6mwcGezL3Abt/aQXaxv9PU8xt+Jr0OFUCAwEAATANBgkqhkiG -9w0BAQQFAANBAOQpYmGgyCqCy1OljgJhCqQOu627oVlHzK1L+t9vBaMfn40AVUR4 -WzQVWO31KTgi5vTK1U+3h46fgUWqQ0h+6rU= ------END CERTIFICATE----- ------BEGIN CERTIFICATE----- -MIAwgKADAgECAgEAMA0GCSqGSIb3DQEBBAUAMGIxETAPBgNVBAcTCEludGVybmV0 -MRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xh -c3MgMSBDQSAtIEluZGl2aWR1YWwgU3Vic2NyaWJlcjAeFw05NjA0MDgxMDIwMjda -Fw05NzA0MDgxMDIwMjdaMGIxETAPBgNVBAcTCEludGVybmV0MRcwFQYDVQQKEw5W -ZXJpU2lnbiwgSW5jLjE0MDIGA1UECxMrVmVyaVNpZ24gQ2xhc3MgMSBDQSAtIElu -ZGl2aWR1YWwgU3Vic2NyaWJlcjCAMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC2 -FKbPTdAFDdjKI9BvqrQpkmOOLPhvltcunXZLEbE2jVfJw/0cxrr+Hgi6M8qV6r7j -W80GqLd5HUQq7XPysVKDaBBwZJHXPmv5912dFEObbpdFmIFH0S3L3bty10w/cari -QPJUObwW7s987LrbP2wqsxaxhhKdrpM01bjV0Pc+qQIDAQABAAAAADANBgkqhkiG -9w0BAQQFAAOBgQA+1nJryNt8VBRjRr07ArDAV/3jAH7GjDc9jsrxZS68ost9v06C -TvTNKGL+LISNmFLXl+JXhgGB0JZ9fvyYzNgHQ46HBUng1H6voalfJgS2KdEo50wW -8EFZYMDkT1k4uynwJqkVN2QJK/2q4/A/VCov5h6SlM8Affg2W+1TLqvqkwAA ------END CERTIFICATE----- - - subject=/L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - issuer= /L=Internet/O=VeriSign, Inc./OU=VeriSign Class 2 CA - Individual Subscriber - ------BEGIN CERTIFICATE----- -MIIEkzCCA/ygAwIBAgIRANDTUpSRL3nTFeMrMayFSPAwDQYJKoZIhvcNAQECBQAw -YjERMA8GA1UEBxMISW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQw -MgYDVQQLEytWZXJpU2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3Jp -YmVyMB4XDTk2MDYwNDAwMDAwMFoXDTk4MDYwNDIzNTk1OVowYjERMA8GA1UEBxMI -SW50ZXJuZXQxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJp -U2lnbiBDbGFzcyAyIENBIC0gSW5kaXZpZHVhbCBTdWJzY3JpYmVyMIGfMA0GCSqG -SIb3DQEBAQUAA4GNADCBiQKBgQC6A+2czKGRcYMfm8gdnk+0de99TDDzsqo0v5nb -RsbUmMcdRQ7nsMbRWe0SAb/9QoLTZ/cJ0iOBqdrkz7UpqqKarVoTSdlSMVM92tWp -3bJncZHQD1t4xd6lQVdI1/T6R+5J0T1ukOdsI9Jmf+F28S6g3R3L1SFwiHKeZKZv -z+793wIDAQABo4ICRzCCAkMwggIpBgNVHQMBAf8EggIdMIICGTCCAhUwggIRBgtg -hkgBhvhFAQcBATCCAgAWggGrVGhpcyBjZXJ0aWZpY2F0ZSBpbmNvcnBvcmF0ZXMg -YnkgcmVmZXJlbmNlLCBhbmQgaXRzIHVzZSBpcyBzdHJpY3RseSBzdWJqZWN0IHRv -LCB0aGUgVmVyaVNpZ24gQ2VydGlmaWNhdGlvbiBQcmFjdGljZSBTdGF0ZW1lbnQg -KENQUyksIGF2YWlsYWJsZSBhdDogaHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL0NQ -Uy0xLjA7IGJ5IEUtbWFpbCBhdCBDUFMtcmVxdWVzdHNAdmVyaXNpZ24uY29tOyBv -ciBieSBtYWlsIGF0IFZlcmlTaWduLCBJbmMuLCAyNTkzIENvYXN0IEF2ZS4sIE1v -dW50YWluIFZpZXcsIENBIDk0MDQzIFVTQSBUZWwuICsxICg0MTUpIDk2MS04ODMw -IENvcHlyaWdodCAoYykgMTk5NiBWZXJpU2lnbiwgSW5jLiAgQWxsIFJpZ2h0cyBS -ZXNlcnZlZC4gQ0VSVEFJTiBXQVJSQU5USUVTIERJU0NMQUlNRUQgYW5kIExJQUJJ -TElUWSBMSU1JVEVELqAOBgxghkgBhvhFAQcBAQGhDgYMYIZIAYb4RQEHAQECMC8w -LRYraHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL3JlcG9zaXRvcnkvQ1BTLTEuMDAU -BglghkgBhvhCAQEBAf8EBAMCAgQwDQYJKoZIhvcNAQECBQADgYEApRJRkNBqLLgs -53IR/d18ODdLOWMTZ+QOOxBrq460iBEdUwgF8vmPRX1ku7UiDeNzaLlurE6eFqHq -2zPyK5j60zfTLVJMWKcQWwTJLjHtXrW8pxhNtFc6Fdvy5ZkHnC/9NIl7/t4U6WqB -p4y+p7SdMIkEwIZfds0VbnQyX5MRUJY= ------END CERTIFICATE----- - - subject=/C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 3 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKhAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAyVxZ -nvIbigEUtBDfBEDb41evakVAj4QMC9Ez2dkRz+4CWB8l9yqoRAWq7AMfeH+ek7ma -AKojfdashaJjRcdyJ8z0TMZ1cdI5709C8HXfCpDGjiBvmA/4rCNfcCk2pMmG57Ga -IMtTpYXnPb59mv4kRTPcdhXtD6JxZExlLoFoRacCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQB1Zmw+0c2B27X4LzZRtvdCvM1Cr9wO+hVs+GeTVzrrtpLotgHKjLeOQ7RJ -Zfk+7r11Ri7J/CVdqMcvi5uPaM+0nJcYwE3vH9mvgrPmZLiEXIqaB1JDYft0nls6 -NvxMsvwaPxUupVs8G5DsiCnkWRb5zget7Ond2tIxik/W2O8XjQ== ------END CERTIFICATE----- - subject=/C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority - issuer= /C=US/O=VeriSign, Inc./OU=Class 4 Public Primary Certification Authority ------BEGIN CERTIFICATE----- -MIICMTCCAZoCBQKmAAABMA0GCSqGSIb3DQEBAgUAMF8xCzAJBgNVBAYTAlVTMRcw -FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMg -UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05NjAxMjkwMDAwMDBa -Fw05OTEyMzEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2ln -biwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgNCBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZp -Y2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0LJ1 -9njQrlpQ9OlQqZ+M1++RlHDo0iSQdomF1t+s5gEXMoDwnZNHvJplnR+Xrr/phnVj -IIm9gFidBAydqMEk6QvlMXi9/C0MN2qeeIDpRnX57aP7E3vIwUzSo+/1PLBij0pd -O92VZ48TucE81qcmm+zDO3rZTbxtm+gVAePwR6kCAwEAATANBgkqhkiG9w0BAQIF -AAOBgQBT3dPwnCR+QKri/AAa19oM/DJhuBUNlvP6Vxt/M3yv6ZiaYch6s7f/sdyZ -g9ysEvxwyR84Qu1E9oAuW2szaayc01znX1oYx7EteQSWQZGZQbE8DbqEOcY7l/Am -yY7uvcxClf8exwI/VAx49byqYHwCaejcrOICdmHEPgPq0ook0Q== ------END CERTIFICATE----- diff --git a/app/openssl/apps/sess_id.c b/app/openssl/apps/sess_id.c index b99179f2..b16686c2 100644 --- a/app/openssl/apps/sess_id.c +++ b/app/openssl/apps/sess_id.c @@ -90,6 +90,7 @@ int MAIN(int, char **); int MAIN(int argc, char **argv) { SSL_SESSION *x=NULL; + X509 *peer = NULL; int ret=1,i,num,badops=0; BIO *out=NULL; int informat,outformat; @@ -163,16 +164,17 @@ bad: ERR_load_crypto_strings(); x=load_sess_id(infile,informat); if (x == NULL) { goto end; } + peer = SSL_SESSION_get0_peer(x); if(context) { - x->sid_ctx_length=strlen(context); - if(x->sid_ctx_length > SSL_MAX_SID_CTX_LENGTH) + size_t ctx_len = strlen(context); + if(ctx_len > SSL_MAX_SID_CTX_LENGTH) { BIO_printf(bio_err,"Context too long\n"); goto end; } - memcpy(x->sid_ctx,context,x->sid_ctx_length); + SSL_SESSION_set1_id_context(x, (unsigned char *)context, ctx_len); } #ifdef undef @@ -231,10 +233,10 @@ bad: if (cert) { - if (x->peer == NULL) + if (peer == NULL) BIO_puts(out,"No certificate present\n"); else - X509_print(out,x->peer); + X509_print(out,peer); } } @@ -253,12 +255,12 @@ bad: goto end; } } - else if (!noout && (x->peer != NULL)) /* just print the certificate */ + else if (!noout && (peer != NULL)) /* just print the certificate */ { if (outformat == FORMAT_ASN1) - i=(int)i2d_X509_bio(out,x->peer); + i=(int)i2d_X509_bio(out,peer); else if (outformat == FORMAT_PEM) - i=PEM_write_bio_X509(out,x->peer); + i=PEM_write_bio_X509(out,peer); else { BIO_printf(bio_err,"bad output format specified for outfile\n"); goto end; diff --git a/app/openssl/apps/speed.c b/app/openssl/apps/speed.c index b3c54424..9c251ebe 100644 --- a/app/openssl/apps/speed.c +++ b/app/openssl/apps/speed.c @@ -108,8 +108,14 @@ #include #endif -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) #include +# if defined(__CYGWIN__) && !defined(_WIN32) + /* should define _WIN32, which normally is mutually + * exclusive with __CYGWIN__, but if it didn't... */ +# define _WIN32 + /* this is done because Cygwin alarm() fails sometimes. */ +# endif #endif #include @@ -183,6 +189,25 @@ #ifndef OPENSSL_NO_ECDH #include #endif +#include + +#ifdef OPENSSL_FIPS +#ifdef OPENSSL_DOING_MAKEDEPEND +#undef AES_set_encrypt_key +#undef AES_set_decrypt_key +#undef DES_set_key_unchecked +#endif +#define BF_set_key private_BF_set_key +#define CAST_set_key private_CAST_set_key +#define idea_set_encrypt_key private_idea_set_encrypt_key +#define SEED_set_key private_SEED_set_key +#define RC2_set_key private_RC2_set_key +#define RC4_set_key private_RC4_set_key +#define DES_set_key_unchecked private_DES_set_key_unchecked +#define AES_set_encrypt_key private_AES_set_encrypt_key +#define AES_set_decrypt_key private_AES_set_decrypt_key +#define Camellia_set_key private_Camellia_set_key +#endif #ifndef HAVE_FORK # if defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_MACINTOSH_CLASSIC) || defined(OPENSSL_SYS_OS2) || defined(OPENSSL_SYS_NETWARE) @@ -214,7 +239,7 @@ static void print_result(int alg,int run_no,int count,double time_used); static int do_multi(int multi); #endif -#define ALGOR_NUM 29 +#define ALGOR_NUM 30 #define SIZE_NUM 5 #define RSA_NUM 4 #define DSA_NUM 3 @@ -229,7 +254,7 @@ static const char *names[ALGOR_NUM]={ "aes-128 cbc","aes-192 cbc","aes-256 cbc", "camellia-128 cbc","camellia-192 cbc","camellia-256 cbc", "evp","sha256","sha512","whirlpool", - "aes-128 ige","aes-192 ige","aes-256 ige"}; + "aes-128 ige","aes-192 ige","aes-256 ige","ghash" }; static double results[ALGOR_NUM][SIZE_NUM]; static int lengths[SIZE_NUM]={16,64,256,1024,8*1024}; #ifndef OPENSSL_NO_RSA @@ -273,9 +298,12 @@ static SIGRETTYPE sig_done(int sig) #if defined(_WIN32) -#define SIGALRM +#if !defined(SIGALRM) +# define SIGALRM +#endif static unsigned int lapse,schlock; -static void alarm(unsigned int secs) { lapse = secs*1000; } +static void alarm_win32(unsigned int secs) { lapse = secs*1000; } +#define alarm alarm_win32 static DWORD WINAPI sleepy(VOID *arg) { @@ -469,6 +497,7 @@ int MAIN(int argc, char **argv) #define D_IGE_128_AES 26 #define D_IGE_192_AES 27 #define D_IGE_256_AES 28 +#define D_GHASH 29 double d=0.0; long c[ALGOR_NUM][SIZE_NUM]; #define R_DSA_512 0 @@ -894,6 +923,10 @@ int MAIN(int argc, char **argv) doit[D_CBC_192_AES]=1; doit[D_CBC_256_AES]=1; } + else if (strcmp(*argv,"ghash") == 0) + { + doit[D_GHASH]=1; + } else #endif #ifndef OPENSSL_NO_CAMELLIA @@ -1264,6 +1297,7 @@ int MAIN(int argc, char **argv) c[D_IGE_128_AES][0]=count; c[D_IGE_192_AES][0]=count; c[D_IGE_256_AES][0]=count; + c[D_GHASH][0]=count; for (i=1; i + +#ifndef OPENSSL_NO_SRP +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "apps.h" + +#undef PROG +#define PROG srp_main + +#define BASE_SECTION "srp" +#define CONFIG_FILE "openssl.cnf" + +#define ENV_RANDFILE "RANDFILE" + +#define ENV_DATABASE "srpvfile" +#define ENV_DEFAULT_SRP "default_srp" + +static char *srp_usage[]={ +"usage: srp [args] [user] \n", +"\n", +" -verbose Talk alot while doing things\n", +" -config file A config file\n", +" -name arg The particular srp definition to use\n", +" -srpvfile arg The srp verifier file name\n", +" -add add an user and srp verifier\n", +" -modify modify the srp verifier of an existing user\n", +" -delete delete user from verifier file\n", +" -list list user\n", +" -gn arg g and N values to be used for new verifier\n", +" -userinfo arg additional info to be set for user\n", +" -passin arg input file pass phrase source\n", +" -passout arg output file pass phrase source\n", +#ifndef OPENSSL_NO_ENGINE +" -engine e - use engine e, possibly a hardware device.\n", +#endif +NULL +}; + +#ifdef EFENCE +extern int EF_PROTECT_FREE; +extern int EF_PROTECT_BELOW; +extern int EF_ALIGNMENT; +#endif + +static CONF *conf=NULL; +static char *section=NULL; + +#define VERBOSE if (verbose) +#define VVERBOSE if (verbose>1) + + +int MAIN(int, char **); + +static int get_index(CA_DB *db, char* id, char type) + { + char ** pp; + int i; + if (id == NULL) return -1; + if (type == DB_SRP_INDEX) + for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++) + { + pp = sk_OPENSSL_PSTRING_value(db->db->data,i); + if (pp[DB_srptype][0] == DB_SRP_INDEX && !strcmp(id,pp[DB_srpid])) + return i; + } + else for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++) + { + pp = sk_OPENSSL_PSTRING_value(db->db->data,i); + + if (pp[DB_srptype][0] != DB_SRP_INDEX && !strcmp(id,pp[DB_srpid])) + return i; + } + + return -1 ; + } + +static void print_entry(CA_DB *db, BIO *bio, int indx, int verbose, char *s) + { + if (indx >= 0 && verbose) + { + int j; + char **pp = sk_OPENSSL_PSTRING_value(db->db->data, indx); + BIO_printf(bio, "%s \"%s\"\n", s, pp[DB_srpid]); + for (j = 0; j < DB_NUMBER; j++) + { + BIO_printf(bio_err," %d = \"%s\"\n", j, pp[j]); + } + } + } + +static void print_index(CA_DB *db, BIO *bio, int indexindex, int verbose) + { + print_entry(db, bio, indexindex, verbose, "g N entry") ; + } + +static void print_user(CA_DB *db, BIO *bio, int userindex, int verbose) + { + if (verbose > 0) + { + char **pp = sk_OPENSSL_PSTRING_value(db->db->data,userindex); + + if (pp[DB_srptype][0] != 'I') + { + print_entry(db, bio, userindex, verbose, "User entry"); + print_entry(db, bio, get_index(db, pp[DB_srpgN], 'I'), verbose, "g N entry"); + } + + } + } + +static int update_index(CA_DB *db, BIO *bio, char **row) + { + char ** irow; + int i; + + if ((irow=(char **)OPENSSL_malloc(sizeof(char *)*(DB_NUMBER+1))) == NULL) + { + BIO_printf(bio_err,"Memory allocation failure\n"); + return 0; + } + + for (i=0; idb,irow)) + { + BIO_printf(bio,"failed to update srpvfile\n"); + BIO_printf(bio,"TXT_DB error number %ld\n",db->db->error); + OPENSSL_free(irow); + return 0; + } + return 1; + } + +static void lookup_fail(const char *name, char *tag) + { + BIO_printf(bio_err,"variable lookup failed for %s::%s\n",name,tag); + } + + +static char *srp_verify_user(const char *user, const char *srp_verifier, + char *srp_usersalt, const char *g, const char *N, + const char *passin, BIO *bio, int verbose) + { + char password[1024]; + PW_CB_DATA cb_tmp; + char *verifier = NULL; + char *gNid = NULL; + + cb_tmp.prompt_info = user; + cb_tmp.password = passin; + + if (password_callback(password, 1024, 0, &cb_tmp) >0) + { + VERBOSE BIO_printf(bio,"Validating\n user=\"%s\"\n srp_verifier=\"%s\"\n srp_usersalt=\"%s\"\n g=\"%s\"\n N=\"%s\"\n",user,srp_verifier,srp_usersalt, g, N); + BIO_printf(bio, "Pass %s\n", password); + + if (!(gNid=SRP_create_verifier(user, password, &srp_usersalt, &verifier, N, g))) + { + BIO_printf(bio, "Internal error validating SRP verifier\n"); + } + else + { + if (strcmp(verifier, srp_verifier)) + gNid = NULL; + OPENSSL_free(verifier); + } + } + return gNid; + } + +static char *srp_create_user(char *user, char **srp_verifier, + char **srp_usersalt, char *g, char *N, + char *passout, BIO *bio, int verbose) + { + char password[1024]; + PW_CB_DATA cb_tmp; + char *gNid = NULL; + char *salt = NULL; + cb_tmp.prompt_info = user; + cb_tmp.password = passout; + + if (password_callback(password,1024,1,&cb_tmp) >0) + { + VERBOSE BIO_printf(bio,"Creating\n user=\"%s\"\n g=\"%s\"\n N=\"%s\"\n",user,g,N); + if (!(gNid =SRP_create_verifier(user, password, &salt, srp_verifier, N, g))) + { + BIO_printf(bio,"Internal error creating SRP verifier\n"); + } + else + *srp_usersalt = salt; + VVERBOSE BIO_printf(bio,"gNid=%s salt =\"%s\"\n verifier =\"%s\"\n", gNid,salt, *srp_verifier); + + } + return gNid; + } + +int MAIN(int argc, char **argv) + { + int add_user = 0; + int list_user= 0; + int delete_user= 0; + int modify_user= 0; + char * user = NULL; + + char *passargin = NULL, *passargout = NULL; + char *passin = NULL, *passout = NULL; + char * gN = NULL; + int gNindex = -1; + char ** gNrow = NULL; + int maxgN = -1; + + char * userinfo = NULL; + + int badops=0; + int ret=1; + int errors=0; + int verbose=0; + int doupdatedb=0; + char *configfile=NULL; + char *dbfile=NULL; + CA_DB *db=NULL; + char **pp ; + int i; + long errorline = -1; + char *randfile=NULL; +#ifndef OPENSSL_NO_ENGINE + char *engine = NULL; +#endif + char *tofree=NULL; + DB_ATTR db_attr; + +#ifdef EFENCE +EF_PROTECT_FREE=1; +EF_PROTECT_BELOW=1; +EF_ALIGNMENT=0; +#endif + + apps_startup(); + + conf = NULL; + section = NULL; + + if (bio_err == NULL) + if ((bio_err=BIO_new(BIO_s_file())) != NULL) + BIO_set_fp(bio_err,stderr,BIO_NOCLOSE|BIO_FP_TEXT); + + argc--; + argv++; + while (argc >= 1 && badops == 0) + { + if (strcmp(*argv,"-verbose") == 0) + verbose++; + else if (strcmp(*argv,"-config") == 0) + { + if (--argc < 1) goto bad; + configfile= *(++argv); + } + else if (strcmp(*argv,"-name") == 0) + { + if (--argc < 1) goto bad; + section= *(++argv); + } + else if (strcmp(*argv,"-srpvfile") == 0) + { + if (--argc < 1) goto bad; + dbfile= *(++argv); + } + else if (strcmp(*argv,"-add") == 0) + add_user=1; + else if (strcmp(*argv,"-delete") == 0) + delete_user=1; + else if (strcmp(*argv,"-modify") == 0) + modify_user=1; + else if (strcmp(*argv,"-list") == 0) + list_user=1; + else if (strcmp(*argv,"-gn") == 0) + { + if (--argc < 1) goto bad; + gN= *(++argv); + } + else if (strcmp(*argv,"-userinfo") == 0) + { + if (--argc < 1) goto bad; + userinfo= *(++argv); + } + else if (strcmp(*argv,"-passin") == 0) + { + if (--argc < 1) goto bad; + passargin= *(++argv); + } + else if (strcmp(*argv,"-passout") == 0) + { + if (--argc < 1) goto bad; + passargout= *(++argv); + } +#ifndef OPENSSL_NO_ENGINE + else if (strcmp(*argv,"-engine") == 0) + { + if (--argc < 1) goto bad; + engine= *(++argv); + } +#endif + + else if (**argv == '-') + { +bad: + BIO_printf(bio_err,"unknown option %s\n",*argv); + badops=1; + break; + } + else + break; + + argc--; + argv++; + } + + if (dbfile && configfile) + { + BIO_printf(bio_err,"-dbfile and -configfile cannot be specified together.\n"); + badops = 1; + } + if (add_user+delete_user+modify_user+list_user != 1) + { + BIO_printf(bio_err,"Exactly one of the options -add, -delete, -modify -list must be specified.\n"); + badops = 1; + } + if (delete_user+modify_user+delete_user== 1 && argc <= 0) + { + BIO_printf(bio_err,"Need at least one user for options -add, -delete, -modify. \n"); + badops = 1; + } + if ((passin || passout) && argc != 1 ) + { + BIO_printf(bio_err,"-passin, -passout arguments only valid with one user.\n"); + badops = 1; + } + + if (badops) + { + for (pp=srp_usage; (*pp != NULL); pp++) + BIO_printf(bio_err,"%s",*pp); + + BIO_printf(bio_err," -rand file%cfile%c...\n", LIST_SEPARATOR_CHAR, LIST_SEPARATOR_CHAR); + BIO_printf(bio_err," load the file (or the files in the directory) into\n"); + BIO_printf(bio_err," the random number generator\n"); + goto err; + } + + ERR_load_crypto_strings(); + +#ifndef OPENSSL_NO_ENGINE + setup_engine(bio_err, engine, 0); +#endif + + if(!app_passwd(bio_err, passargin, passargout, &passin, &passout)) + { + BIO_printf(bio_err, "Error getting passwords\n"); + goto err; + } + + if (!dbfile) + { + + + /*****************************************************************/ + tofree=NULL; + if (configfile == NULL) configfile = getenv("OPENSSL_CONF"); + if (configfile == NULL) configfile = getenv("SSLEAY_CONF"); + if (configfile == NULL) + { + const char *s=X509_get_default_cert_area(); + size_t len; + +#ifdef OPENSSL_SYS_VMS + len = strlen(s)+sizeof(CONFIG_FILE); + tofree=OPENSSL_malloc(len); + strcpy(tofree,s); +#else + len = strlen(s)+sizeof(CONFIG_FILE)+1; + tofree=OPENSSL_malloc(len); + BUF_strlcpy(tofree,s,len); + BUF_strlcat(tofree,"/",len); +#endif + BUF_strlcat(tofree,CONFIG_FILE,len); + configfile=tofree; + } + + VERBOSE BIO_printf(bio_err,"Using configuration from %s\n",configfile); + conf = NCONF_new(NULL); + if (NCONF_load(conf,configfile,&errorline) <= 0) + { + if (errorline <= 0) + BIO_printf(bio_err,"error loading the config file '%s'\n", + configfile); + else + BIO_printf(bio_err,"error on line %ld of config file '%s'\n" + ,errorline,configfile); + goto err; + } + if(tofree) + { + OPENSSL_free(tofree); + tofree = NULL; + } + + if (!load_config(bio_err, conf)) + goto err; + + /* Lets get the config section we are using */ + if (section == NULL) + { + VERBOSE BIO_printf(bio_err,"trying to read " ENV_DEFAULT_SRP " in \" BASE_SECTION \"\n"); + + section=NCONF_get_string(conf,BASE_SECTION,ENV_DEFAULT_SRP); + if (section == NULL) + { + lookup_fail(BASE_SECTION,ENV_DEFAULT_SRP); + goto err; + } + } + + if (randfile == NULL && conf) + randfile = NCONF_get_string(conf, BASE_SECTION, "RANDFILE"); + + + VERBOSE BIO_printf(bio_err,"trying to read " ENV_DATABASE " in section \"%s\"\n",section); + + if ((dbfile=NCONF_get_string(conf,section,ENV_DATABASE)) == NULL) + { + lookup_fail(section,ENV_DATABASE); + goto err; + } + + } + if (randfile == NULL) + ERR_clear_error(); + else + app_RAND_load_file(randfile, bio_err, 0); + + VERBOSE BIO_printf(bio_err,"Trying to read SRP verifier file \"%s\"\n",dbfile); + + db = load_index(dbfile, &db_attr); + if (db == NULL) goto err; + + /* Lets check some fields */ + for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++) + { + pp = sk_OPENSSL_PSTRING_value(db->db->data, i); + + if (pp[DB_srptype][0] == DB_SRP_INDEX) + { + maxgN = i; + if (gNindex < 0 && gN != NULL && !strcmp(gN, pp[DB_srpid])) + gNindex = i; + + print_index(db, bio_err, i, verbose > 1); + } + } + + VERBOSE BIO_printf(bio_err, "Database initialised\n"); + + if (gNindex >= 0) + { + gNrow = sk_OPENSSL_PSTRING_value(db->db->data,gNindex); + print_entry(db, bio_err, gNindex, verbose > 1, "Default g and N"); + } + else if (maxgN > 0 && !SRP_get_default_gN(gN)) + { + BIO_printf(bio_err, "No g and N value for index \"%s\"\n", gN); + goto err; + } + else + { + VERBOSE BIO_printf(bio_err, "Database has no g N information.\n"); + gNrow = NULL; + } + + + VVERBOSE BIO_printf(bio_err,"Starting user processing\n"); + + if (argc > 0) + user = *(argv++) ; + + while (list_user || user) + { + int userindex = -1; + if (user) + VVERBOSE BIO_printf(bio_err, "Processing user \"%s\"\n", user); + if ((userindex = get_index(db, user, 'U')) >= 0) + { + print_user(db, bio_err, userindex, (verbose > 0) || list_user); + } + + if (list_user) + { + if (user == NULL) + { + BIO_printf(bio_err,"List all users\n"); + + for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++) + { + print_user(db,bio_err, i, 1); + } + list_user = 0; + } + else if (userindex < 0) + { + BIO_printf(bio_err, "user \"%s\" does not exist, ignored. t\n", + user); + errors++; + } + } + else if (add_user) + { + if (userindex >= 0) + { + /* reactivation of a new user */ + char **row = sk_OPENSSL_PSTRING_value(db->db->data, userindex); + BIO_printf(bio_err, "user \"%s\" reactivated.\n", user); + row[DB_srptype][0] = 'V'; + + doupdatedb = 1; + } + else + { + char *row[DB_NUMBER] ; char *gNid; + row[DB_srpverifier] = NULL; + row[DB_srpsalt] = NULL; + row[DB_srpinfo] = NULL; + if (!(gNid = srp_create_user(user,&(row[DB_srpverifier]), &(row[DB_srpsalt]),gNrow?gNrow[DB_srpsalt]:gN,gNrow?gNrow[DB_srpverifier]:NULL, passout, bio_err,verbose))) + { + BIO_printf(bio_err, "Cannot create srp verifier for user \"%s\", operation abandoned .\n", user); + errors++; + goto err; + } + row[DB_srpid] = BUF_strdup(user); + row[DB_srptype] = BUF_strdup("v"); + row[DB_srpgN] = BUF_strdup(gNid); + + if (!row[DB_srpid] || !row[DB_srpgN] || !row[DB_srptype] || !row[DB_srpverifier] || !row[DB_srpsalt] || + (userinfo && (!(row[DB_srpinfo] = BUF_strdup(userinfo)))) || + !update_index(db, bio_err, row)) + { + if (row[DB_srpid]) OPENSSL_free(row[DB_srpid]); + if (row[DB_srpgN]) OPENSSL_free(row[DB_srpgN]); + if (row[DB_srpinfo]) OPENSSL_free(row[DB_srpinfo]); + if (row[DB_srptype]) OPENSSL_free(row[DB_srptype]); + if (row[DB_srpverifier]) OPENSSL_free(row[DB_srpverifier]); + if (row[DB_srpsalt]) OPENSSL_free(row[DB_srpsalt]); + goto err; + } + doupdatedb = 1; + } + } + else if (modify_user) + { + if (userindex < 0) + { + BIO_printf(bio_err,"user \"%s\" does not exist, operation ignored.\n",user); + errors++; + } + else + { + + char **row = sk_OPENSSL_PSTRING_value(db->db->data, userindex); + char type = row[DB_srptype][0]; + if (type == 'v') + { + BIO_printf(bio_err,"user \"%s\" already updated, operation ignored.\n",user); + errors++; + } + else + { + char *gNid; + + if (row[DB_srptype][0] == 'V') + { + int user_gN; + char **irow = NULL; + VERBOSE BIO_printf(bio_err,"Verifying password for user \"%s\"\n",user); + if ( (user_gN = get_index(db, row[DB_srpgN], DB_SRP_INDEX)) >= 0) + irow = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex); + + if (!srp_verify_user(user, row[DB_srpverifier], row[DB_srpsalt], irow ? irow[DB_srpsalt] : row[DB_srpgN], irow ? irow[DB_srpverifier] : NULL, passin, bio_err, verbose)) + { + BIO_printf(bio_err, "Invalid password for user \"%s\", operation abandoned.\n", user); + errors++; + goto err; + } + } + VERBOSE BIO_printf(bio_err,"Password for user \"%s\" ok.\n",user); + + if (!(gNid=srp_create_user(user,&(row[DB_srpverifier]), &(row[DB_srpsalt]),gNrow?gNrow[DB_srpsalt]:NULL, gNrow?gNrow[DB_srpverifier]:NULL, passout, bio_err,verbose))) + { + BIO_printf(bio_err, "Cannot create srp verifier for user \"%s\", operation abandoned.\n", user); + errors++; + goto err; + } + + row[DB_srptype][0] = 'v'; + row[DB_srpgN] = BUF_strdup(gNid); + + if (!row[DB_srpid] || !row[DB_srpgN] || !row[DB_srptype] || !row[DB_srpverifier] || !row[DB_srpsalt] || + (userinfo && (!(row[DB_srpinfo] = BUF_strdup(userinfo))))) + goto err; + + doupdatedb = 1; + } + } + } + else if (delete_user) + { + if (userindex < 0) + { + BIO_printf(bio_err, "user \"%s\" does not exist, operation ignored. t\n", user); + errors++; + } + else + { + char **xpp = sk_OPENSSL_PSTRING_value(db->db->data,userindex); + BIO_printf(bio_err, "user \"%s\" revoked. t\n", user); + + xpp[DB_srptype][0] = 'R'; + + doupdatedb = 1; + } + } + if (--argc > 0) + user = *(argv++) ; + else + { + user = NULL; + list_user = 0; + } + } + + VERBOSE BIO_printf(bio_err,"User procession done.\n"); + + + if (doupdatedb) + { + /* Lets check some fields */ + for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++) + { + pp = sk_OPENSSL_PSTRING_value(db->db->data,i); + + if (pp[DB_srptype][0] == 'v') + { + pp[DB_srptype][0] = 'V'; + print_user(db, bio_err, i, verbose); + } + } + + VERBOSE BIO_printf(bio_err, "Trying to update srpvfile.\n"); + if (!save_index(dbfile, "new", db)) goto err; + + VERBOSE BIO_printf(bio_err, "Temporary srpvfile created.\n"); + if (!rotate_index(dbfile, "new", "old")) goto err; + + VERBOSE BIO_printf(bio_err, "srpvfile updated.\n"); + } + + ret = (errors != 0); +err: + if (errors != 0) + VERBOSE BIO_printf(bio_err,"User errors %d.\n",errors); + + VERBOSE BIO_printf(bio_err,"SRP terminating with code %d.\n",ret); + if(tofree) + OPENSSL_free(tofree); + if (ret) ERR_print_errors(bio_err); + if (randfile) app_RAND_write_file(randfile, bio_err); + if (conf) NCONF_free(conf); + if (db) free_index(db); + + OBJ_cleanup(); + apps_shutdown(); + OPENSSL_EXIT(ret); + } + + + +#endif + diff --git a/app/openssl/apps/verify.c b/app/openssl/apps/verify.c index 9163997e..893670ff 100644 --- a/app/openssl/apps/verify.c +++ b/app/openssl/apps/verify.c @@ -222,25 +222,37 @@ int MAIN(int argc, char **argv) goto end; } - if (argc < 1) check(cert_ctx, NULL, untrusted, trusted, crls, e); + ret = 0; + if (argc < 1) + { + if (1 != check(cert_ctx, NULL, untrusted, trusted, crls, e)) + ret = -1; + } else + { for (i=0; i= $1 +# $1+: message. +dump_n () { + local LOG_LEVEL=$1 + shift + if [ "$VERBOSE" -ge "$LOG_LEVEL" ]; then + printf "%s\n" "$@" + fi +} + +# Dump a message unless --quiet is used. +# $1+: message. +dump () { + dump_n 1 "$@" +} + +# Dump a message if --verbose is used only. +# $1+: message. +log () { + dump_n 2 "$@" +} + +# Run a command silently, unless --verbose or '--verbose --verbose' +# is used. +# $1+: Command +# Return: command status. +run () { + log "COMMAND: $*" + case $VERBOSE in + 0) + "$@" >/dev/null 2>&1 || return $? + ;; + 1) + "$@" >/dev/null || return $? + ;; + *) + "$@" || return $? + ;; + esac +} + +# $1: string +# Out: input string, with capital letters replaced by small ones. +tolower () { + echo "$1" | tr '[A-Z]' '[a-z]' +} + +# Return value of a given variable. +# $1: Variable name +var_value () { + eval printf \"%s\" \"\$$1\" +} + +# Remove some items from a list +# $1: input space-separated list +# $2: space-separated list of items to remove from 1 +# Out: items of $1 without items of $2 +filter_out () { + local TMP=$(mktemp) + local RESULT + printf "" > $TMP + echo "$2" | tr ' ' '\n' > $TMP + RESULT=$(echo "$1" | tr ' ' '\n' | fgrep -x -v -f $TMP | tr '\n' ' ') + rm -f $TMP + echo "$RESULT" +} + +src_to_obj () { + case $1 in + *.c) + echo ${1%%.c}.o + ;; + *.S) + echo ${1%%.S}.o + ;; + *) + echo $1 + ;; + esac +} + +# Determine host operating system. +HOST_OS=$(uname -s) +case $HOST_OS in + Linux) + HOST_OS=linux + ;; + Darwin) + HOST_OS=darwin + ;; +esac + +# Determine host architecture +HOST_ARCH=$(uname -m) +case $HOST_ARCH in + i?86) + HOST_ARCH=x86 + ;; +esac + +ANDROID_HOST_TAG=$HOST_OS-$HOST_ARCH + +case $ANDROID_HOST_TAG in + linux-x86_64|darwin-x86-64) + ANDROID_HOST_TAG=$HOST_OS-x86 + ;; + *) + panic "Sorry, this script can only run on 64-bit Linux or Darwin" +esac + +# Determine number of cores +case $HOST_OS in + linux) + NUM_CORES=$(grep -c "processor" /proc/cpuinfo) + ;; + darwin) + NUM_CORES=$(sysctl -n hw.ncpu) + ;; + *) + NUM_CORES=1 + ;; +esac + +# The list of supported Android target architectures. + +# NOTE: x86_64 is not ready yet, while the toolchain is in +# prebuilts/ it doesn't have a sysroot which means it requires +# a platform build to get Bionic and stuff. +ANDROID_ARCHS="arm x86 mips" + +BUILD_TYPES= +for ARCH in $ANDROID_ARCHS; do + BUILD_TYPES="$BUILD_TYPES android-$ARCH" +done +ANDROID_BUILD_TYPES=$BUILD_TYPES + +HOST_BUILD_TYPES="$HOST_OS-x86 $HOST_OS-generic32 $HOST_OS-generic64" +HOST_BUILD_TYPES="$HOST_BUILD_TYPES $HOST_OS-x86_64" + +BUILD_TYPES="$ANDROID_BUILD_TYPES $HOST_BUILD_TYPES" + +# Parse command-line +DO_HELP= +SRC_DIR=$(cd $PROGDIR && pwd) +OUT_DIR=out +BUILD_DIR= +BUILD_TYPES= +NUM_JOBS=$NUM_CORES +ANDROID_BUILD_TOP=$(cd $PROGDIR/../.. && pwd) +for OPT; do + case $OPT in + --help|-h|-?) + DO_HELP=true + ;; + --build-dir=*) + BUILD_DIR=${OPT##--build-dir=} + ;; + --verbose) + VERBOSE=$(( $VERBOSE + 1 )) + ;; + --jobs=*) + NUM_JOBS=${OPT##--jobs=} + ;; + --quiet) + VERBOSE=$(( $VERBOSE - 1 )) + ;; + -j*) + NUM_JOBS=${OPT##-j} + ;; + -*) + panic "Unknown option '$OPT', see --help for details." + ;; + *) + BUILD_TYPES="$BUILD_TYPES $OPT" + ;; + esac +done + +# Print help when needed. +if [ "$DO_HELP" ]; then + echo \ +"Usage: $PROGNAME [options] [ ...] + +This script is used to ensure that all OpenSSL build variants compile +properly. It can be used after modifying external/openssl/openssl.config +and re-running import_openssl.sh to check that any changes didn't break +the build. + +A is a description of a given build of the library and its +program. Its format is: + + -- + +Where: is either 'gcc' or 'clang'. + is 'android', 'linux' or 'darwin'. + is 'arm', 'x86' or 'mips'. + +By default, it rebuilds the sources for the following build types: +" + for BUILD_TYPE in $BUILD_TYPES; do + echo " $BUILD_TYPE" + done + + echo \ +"However, you can pass custom values on the command-line instead. + +This scripts generates a custom Makefile in a temporary directory, then +launches 'make' in it to build all binaries in parallel. In case of +problem, you can use the --build-dir= option to specify a custom +build-directory, which will _not_ be removed when the script exits. + +For example, to better see why a build fails: + + ./$PROGNAME --build-dir=/tmp/mydir + make -C /tmp/mydir V=1 + +Valid options: + + --help|-h|-? Print this message. + --build-dir= Specify build directory. + --jobs= Run parallel build jobs [$NUM_JOBS]. + -j Same as --jobs=. + --verbose Increase verbosity. + --quiet Decrease verbosity. +" + exit 0 +fi + +log "Host OS: $HOST_OS" +log "Host arch: $HOST_ARCH" +log "Host CPU count: $NUM_CORES" + +if [ -z "$BUILD_TYPES" ]; then + BUILD_TYPES="$ANDROID_BUILD_TYPES $HOST_BUILD_TYPES" +fi +log "Build types: $BUILD_TYPES" + +if [ -z "$BUILD_DIR" ]; then + # Create a temporary directory, ensure it gets destroyed properly + # when the script exits. + BUILD_DIR=$(mktemp -d) + clean_build_dir () { + log "Cleaning up temporary directory: $BUILD_DIR" + rm -rf "$BUILD_DIR" + exit $1 + } + trap "clean_build_dir 0" EXIT + trap "clean_build_dir \$?" INT HUP QUIT TERM + log "Using temporary build directory: $BUILD_DIR" +else + log "Using user build directory: $BUILD_DIR" +fi + +mkdir -p "$BUILD_DIR" && rm -rf "$BUILD_DIR"/* + +MAKEFILE=$BUILD_DIR/GNUmakefile + +# Return source files for a given module and architecture. +# $1: module prefix (e.g. CRYPTO) +# $2: build arch. +get_module_src_files_for_arch () { + local prefix=$1 + local arch=$2 + local src_files="$(var_value OPENSSL_${prefix}_SOURCES)" + src_files="$src_files $(var_value OPENSSL_${prefix}_SOURCES_${arch})" + local exclude_files="$(var_value OPENSSL_${prefix}_SOURCES_EXCLUDES_${arch})" + src_files=$(filter_out "$src_files" "$exclude_files") + echo "$src_files" +} + +# Return the compiler defines for a given module and architecture +# $1: module prefix (e.g. CRYPTO) +# $2 build arch. +get_module_defines_for_arch () { + local prefix=$1 + local arch=$2 + local defines="$(var_value OPENSSL_${prefix}_DEFINES)" + defines="$defines $(var_value OPENSSL_${prefix}_DEFINES_${arch})" + echo "$defines" +} + +# $1: module prefix (e.g. CRYPTO) +get_module_c_includes () { + var_value OPENSSL_$1_INCLUDES +} + +# $1: build type (e.g. gcc-android-arm) +# Out: build arch. +get_build_arch () { + echo "$1" | cut -d- -f3 +} + +# $1: build arch +# Out: GNU configuration target (e.g. arm-linux-androideabi) +get_build_arch_target () { + case $1 in + arm) + echo "arm-linux-androideabi" + ;; + x86) + echo "i686-linux-android" + ;; + x86_64) + echo "x86_64-linux-android" + ;; + mips) + echo "mipsel-linux-android" + ;; + *) + echo "$1-linux-android" + ;; + esac +} + +GCC_VERSION=4.7 +CLANG_VERSION=3.1 + +get_prebuilt_gcc_dir_for_arch () { + local arch=$1 + local target=$(get_build_arch_target $arch) + # Adjust $arch for x86_64 because the prebuilts are actually + # under prebuilts/gcc//x86/ + case $arch in + x86_64) + arch=x86 + ;; + esac + echo "$ANDROID_BUILD_TOP/prebuilts/gcc/$ANDROID_HOST_TAG/$arch/$target-$GCC_VERSION" +} + +get_prebuilt_clang () { + echo "$ANDROID_BUILD_TOP/prebuilts/clang/$ANDROID_HOST_TAG/$CLANG_VERSION/clang" +} + +get_prebuilt_ndk_sysroot_for_arch () { + echo "$ANDROID_BUILD_TOP/prebuilts/ndk/current/platforms/android-9/arch-$1" +} + +get_c_runtime_file () { + local build_type=$1 + local arch=$(get_build_arch $build_type) + local filename=$2 + echo "$(get_prebuilt_ndk_sysroot_for_arch $arch)/usr/lib/$filename" +} + +# $1: build type (e.g. gcc-android-arm) +get_build_compiler () { + local arch=$(get_build_arch $1) + local target=$(get_build_arch_target $arch) + local gcc_dir=$(get_prebuilt_gcc_dir_for_arch $arch); + local result + + # Get the toolchain binary. + case $1 in + gcc-android-*) + result="$gcc_dir/bin/$target-gcc" + ;; + clang-android-*) + result="$(get_prebuilt_clang) -target $target -B$gcc_dir/$target/bin -I$gcc_dir/lib/gcc/$target/$GCC_VERSION/include" + ;; + gcc-*) + result=gcc + ;; + clang-*) # Must have host clang compiler. + result=clang + ;; + esac + + compiler_check=$(which $result 2>/dev/null || echo "") + if [ -z "$compiler_check" ]; then + panic "Could not find compiler: $result" + fi + + # Get the Android sysroot if needed. + case $1 in + *-android-*) + result="$result --sysroot=$(get_prebuilt_ndk_sysroot_for_arch $arch)" + ;; + esac + + # Force -m32 flag when needed for 32-bit builds. + case $1 in + *-linux-x86|*-darwin-x86|*-generic32) + result="$result -m32" + ;; + esac + echo "$result" +} + +# $1: build type. +# Out: common compiler flags for this build. +get_build_c_flags () { + local result="-O2 -fPIC" + case $1 in + *-android-arm) + result="$result -march=armv7-a -mfpu=vfpv3-d16" + ;; + esac + + case $1 in + *-generic32|*-generic64) + # Generic builds do not compile without this flag. + result="$result -DOPENSSL_NO_ASM" + ;; + esac + echo "$result" +} + +# $1: build type. +# Out: linker for this build. +get_build_linker () { + get_build_compiler $1 +} + +clear_sources () { + g_all_objs="" +} + +# Generate build instructions to compile source files. +# Also update g_all_objs. +# $1: module prefix (e.g. CRYPTO) +# $2: build type +build_sources () { + local prefix=$1 + local build_type=$2 + echo "## build_sources prefix='$prefix' build_type='$build_type'" + local arch=$(get_build_arch $build_type) + local src_files=$(get_module_src_files_for_arch $prefix $arch) + local c_defines=$(get_module_defines_for_arch $prefix $arch) + local c_includes=$(get_module_c_includes $prefix "$SRC_DIR") + local build_cc=$(get_build_compiler $build_type) + local build_cflags=$(get_build_c_flags $build_type) + local build_linker=$(get_build_linker $build_type) + local src obj def inc + + printf "OUT_DIR := $OUT_DIR/$build_type\n\n" + printf "BUILD_CC := $build_cc\n\n" + printf "BUILD_LINKER := $build_linker\n\n" + printf "BUILD_CFLAGS := $build_cflags" + for inc in $c_includes; do + printf " -I\$(SRC_DIR)/$inc" + done + for def in $c_defines; do + printf " -D$def" + done + printf "\n\n" + printf "BUILD_OBJECTS :=\n\n" + + case $build_type in + clang-android-*) + # The version of clang that comes with the platform build doesn't + # support simple linking of shared libraries and executables. One + # has to provide the C runtime files explicitely. + local crtbegin_so=$(get_c_runtime_file $build_type crtbegin_so.o) + local crtend_so=$(get_c_runtime_file $build_type crtend_so.o) + local crtbegin_exe=$(get_c_runtime_file $build_type crtbegin_dynamic.o) + local crtend_exe=$(get_c_runtime_file $build_type crtend_android.o) + printf "CRTBEGIN_SO := $crtbegin_so\n" + printf "CRTEND_SO := $crtend_so\n" + printf "CRTBEGIN_EXE := $crtbegin_exe\n" + printf "CRTEND_EXE := $crtend_exe\n" + printf "\n" + ;; + esac + + for src in $src_files; do + obj=$(src_to_obj $src) + g_all_objs="$g_all_objs $obj" + printf "OBJ := \$(OUT_DIR)/$obj\n" + printf "BUILD_OBJECTS += \$(OBJ)\n" + printf "\$(OBJ): PRIVATE_CC := \$(BUILD_CC)\n" + printf "\$(OBJ): PRIVATE_CFLAGS := \$(BUILD_CFLAGS)\n" + printf "\$(OBJ): \$(SRC_DIR)/$src\n" + printf "\t@echo [$build_type] CC $src\n" + printf "\t@mkdir -p \$\$(dirname \$@)\n" + printf "\t\$(hide) \$(PRIVATE_CC) \$(PRIVATE_CFLAGS) -c -o \$@ \$<\n" + printf "\n" + done + printf "\n" +} + +# $1: library name (e.g. crypto). +# $2: module prefix (e.g. CRYPTO). +# $3: build type. +# $4: source directory. +# $5: output directory. +build_shared_library () { + local name=$1 + local prefix=$2 + local build_type=$3 + local src_dir="$4" + local out_dir="$5" + local shlib="lib${name}.so" + local build_linker=$(get_build_linker $build_type) + clear_sources + build_sources $prefix $build_type + + # TODO(digit): Make the clang build link properly. + printf "SHLIB=\$(OUT_DIR)/$shlib\n" + printf "\$(SHLIB): PRIVATE_LINKER := \$(BUILD_LINKER)\n" + case $build_type in + clang-android-*) + printf "\$(SHLIB): PRIVATE_CRTBEGIN := \$(CRTBEGIN_SO)\n" + printf "\$(SHLIB): PRIVATE_CRTEND := \$(CRTEND_SO)\n" + ;; + esac + printf "\$(SHLIB): \$(BUILD_OBJECTS)\n" + printf "\t@echo [$build_type] SHARED_LIBRARY $(basename $shlib)\n" + printf "\t@mkdir -p \$\$(dirname \$@)\n" + case $build_type in + clang-android-*) + printf "\t\$(hide) \$(PRIVATE_LINKER) -nostdlib -shared -o \$@ \$(PRIVATE_CRTBEGIN) \$^ \$(PRIVATE_CRTEND)\n" + ;; + *) + printf "\t\$(hide) \$(PRIVATE_LINKER) -shared -o \$@ \$^\n" + ;; + esac + printf "\n" +} + +# $1: executable name. +# $2: module prefix (e.g. APPS). +# $3: build type. +# $4: source directory. +# $5: output directory. +# $6: dependent shared libraries (e.g. 'crypto ssl') +build_executable () { + local name=$1 + local prefix=$2 + local build_type=$3 + local src_dir="$4" + local out_dir="$5" + local shlibs="$6" + local build_linker=$(get_build_linker $build_type) + clear_sources + build_sources $prefix $build_type + + # TODO(digit): Make the clang build link properly. + exec=$name + all_shlibs= + printf "EXEC := \$(OUT_DIR)/$name\n" + printf "openssl_all: \$(EXEC)\n" + printf "\$(EXEC): PRIVATE_LINKER := \$(BUILD_LINKER)\n" + printf "\$(EXEC): \$(BUILD_OBJECTS)" + for lib in $shlibs; do + printf " \$(OUT_DIR)/lib${lib}.so" + done + printf "\n" + printf "\t@echo [$build_type] EXECUTABLE $name\n" + printf "\t@mkdir -p \$\$(dirname \$@)\n" + printf "\t\$(hide) \$(PRIVATE_LINKER) -o \$@ \$^\n" + printf "\n" +} + +ALL_BUILDS= + +generate_openssl_build () { + local build_type=$1 + local out="$OUT_DIR/$build_type" + ALL_BUILDS="$ALL_BUILDS $build_type" + echo "# Build type: $build_type" + build_shared_library crypto CRYPTO $build_type "$SRC_DIR" "$out" + build_shared_library ssl SSL $build_type "$SRC_DIR" "$out" + build_executable openssl APPS $build_type "$SRC_DIR" "$out" "crypto ssl" +} + +generate_makefile () { + echo \ +"# Auto-generated by $PROGDIR - do not edit + +.PHONY: openssl_all + +all: openssl_all + +# Use 'make V=1' to print build commands. +ifeq (1,\$(V)) +hide := +else +hide := @ +endif + +SRC_DIR=$SRC_DIR +OUT_DIR=$OUT_DIR +" + + for BUILD_TYPE in $BUILD_TYPES; do + generate_openssl_build gcc-$BUILD_TYPE + done + +# TODO(digit): Make the Clang build run. +# for BUILD_TYPE in $ANDROID_BUILD_TYPES; do +# generate_openssl_build clang-$BUILD_TYPE +# done +} + +. $SRC_DIR/openssl.config + + + +dump "Generating Makefile" +log "Makefile path: $MAKEFILE" +generate_makefile > $MAKEFILE + +dump "Building libraries with $NUM_JOBS jobs" +dump "For the following builds:" +for BUILD in $ALL_BUILDS; do + dump " $BUILD" +done +MAKE_FLAGS="-j$NUM_JOBS" +if [ "$VERBOSE" -gt 2 ]; then + MAKE_FLAGS="$MAKE_FLAGS V=1" +fi +run make $MAKE_FLAGS -f "$MAKEFILE" -C "$BUILD_DIR" +case $? in + 0) + dump "All OK, congratulations!" + ;; + *) + dump "Error, try doing the following to inspect the issues:" + dump " $PROGNAME --build-dir=/tmp/mybuild" + dump " make -C /tmp/mybuild V=1" + dump " " + ;; +esac diff --git a/app/openssl/crypto/Android.mk b/app/openssl/crypto/Android.mk deleted file mode 100644 index 70aef35d..00000000 --- a/app/openssl/crypto/Android.mk +++ /dev/null @@ -1,559 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -arm_cflags := -DOPENSSL_BN_ASM_MONT -DAES_ASM -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM -arm_src_files := \ - aes/asm/aes-armv4.s \ - bn/asm/armv4-mont.s \ - sha/asm/sha1-armv4-large.s \ - sha/asm/sha256-armv4.s \ - sha/asm/sha512-armv4.s -non_arm_src_files := aes/aes_core.c - -local_src_files := \ - cryptlib.c \ - mem.c \ - mem_clr.c \ - mem_dbg.c \ - cversion.c \ - ex_data.c \ - cpt_err.c \ - ebcdic.c \ - uid.c \ - o_time.c \ - o_str.c \ - o_dir.c \ - aes/aes_cbc.c \ - aes/aes_cfb.c \ - aes/aes_ctr.c \ - aes/aes_ecb.c \ - aes/aes_misc.c \ - aes/aes_ofb.c \ - aes/aes_wrap.c \ - asn1/a_bitstr.c \ - asn1/a_bool.c \ - asn1/a_bytes.c \ - asn1/a_d2i_fp.c \ - asn1/a_digest.c \ - asn1/a_dup.c \ - asn1/a_enum.c \ - asn1/a_gentm.c \ - asn1/a_i2d_fp.c \ - asn1/a_int.c \ - asn1/a_mbstr.c \ - asn1/a_object.c \ - asn1/a_octet.c \ - asn1/a_print.c \ - asn1/a_set.c \ - asn1/a_sign.c \ - asn1/a_strex.c \ - asn1/a_strnid.c \ - asn1/a_time.c \ - asn1/a_type.c \ - asn1/a_utctm.c \ - asn1/a_utf8.c \ - asn1/a_verify.c \ - asn1/ameth_lib.c \ - asn1/asn1_err.c \ - asn1/asn1_gen.c \ - asn1/asn1_lib.c \ - asn1/asn1_par.c \ - asn1/asn_mime.c \ - asn1/asn_moid.c \ - asn1/asn_pack.c \ - asn1/bio_asn1.c \ - asn1/bio_ndef.c \ - asn1/d2i_pr.c \ - asn1/d2i_pu.c \ - asn1/evp_asn1.c \ - asn1/f_enum.c \ - asn1/f_int.c \ - asn1/f_string.c \ - asn1/i2d_pr.c \ - asn1/i2d_pu.c \ - asn1/n_pkey.c \ - asn1/nsseq.c \ - asn1/p5_pbe.c \ - asn1/p5_pbev2.c \ - asn1/p8_pkey.c \ - asn1/t_bitst.c \ - asn1/t_crl.c \ - asn1/t_pkey.c \ - asn1/t_req.c \ - asn1/t_spki.c \ - asn1/t_x509.c \ - asn1/t_x509a.c \ - asn1/tasn_dec.c \ - asn1/tasn_enc.c \ - asn1/tasn_fre.c \ - asn1/tasn_new.c \ - asn1/tasn_prn.c \ - asn1/tasn_typ.c \ - asn1/tasn_utl.c \ - asn1/x_algor.c \ - asn1/x_attrib.c \ - asn1/x_bignum.c \ - asn1/x_crl.c \ - asn1/x_exten.c \ - asn1/x_info.c \ - asn1/x_long.c \ - asn1/x_name.c \ - asn1/x_nx509.c \ - asn1/x_pkey.c \ - asn1/x_pubkey.c \ - asn1/x_req.c \ - asn1/x_sig.c \ - asn1/x_spki.c \ - asn1/x_val.c \ - asn1/x_x509.c \ - asn1/x_x509a.c \ - bf/bf_cfb64.c \ - bf/bf_ecb.c \ - bf/bf_enc.c \ - bf/bf_ofb64.c \ - bf/bf_skey.c \ - bio/b_dump.c \ - bio/b_print.c \ - bio/b_sock.c \ - bio/bf_buff.c \ - bio/bf_nbio.c \ - bio/bf_null.c \ - bio/bio_cb.c \ - bio/bio_err.c \ - bio/bio_lib.c \ - bio/bss_acpt.c \ - bio/bss_bio.c \ - bio/bss_conn.c \ - bio/bss_dgram.c \ - bio/bss_fd.c \ - bio/bss_file.c \ - bio/bss_log.c \ - bio/bss_mem.c \ - bio/bss_null.c \ - bio/bss_sock.c \ - bn/bn_add.c \ - bn/bn_asm.c \ - bn/bn_blind.c \ - bn/bn_const.c \ - bn/bn_ctx.c \ - bn/bn_div.c \ - bn/bn_err.c \ - bn/bn_exp.c \ - bn/bn_exp2.c \ - bn/bn_gcd.c \ - bn/bn_gf2m.c \ - bn/bn_kron.c \ - bn/bn_lib.c \ - bn/bn_mod.c \ - bn/bn_mont.c \ - bn/bn_mpi.c \ - bn/bn_mul.c \ - bn/bn_nist.c \ - bn/bn_prime.c \ - bn/bn_print.c \ - bn/bn_rand.c \ - bn/bn_recp.c \ - bn/bn_shift.c \ - bn/bn_sqr.c \ - bn/bn_sqrt.c \ - bn/bn_word.c \ - buffer/buf_err.c \ - buffer/buffer.c \ - comp/c_rle.c \ - comp/c_zlib.c \ - comp/comp_err.c \ - comp/comp_lib.c \ - conf/conf_api.c \ - conf/conf_def.c \ - conf/conf_err.c \ - conf/conf_lib.c \ - conf/conf_mall.c \ - conf/conf_mod.c \ - conf/conf_sap.c \ - des/cbc_cksm.c \ - des/cbc_enc.c \ - des/cfb64ede.c \ - des/cfb64enc.c \ - des/cfb_enc.c \ - des/des_enc.c \ - des/des_old.c \ - des/des_old2.c \ - des/ecb3_enc.c \ - des/ecb_enc.c \ - des/ede_cbcm_enc.c \ - des/enc_read.c \ - des/enc_writ.c \ - des/fcrypt.c \ - des/fcrypt_b.c \ - des/ofb64ede.c \ - des/ofb64enc.c \ - des/ofb_enc.c \ - des/pcbc_enc.c \ - des/qud_cksm.c \ - des/rand_key.c \ - des/read2pwd.c \ - des/rpc_enc.c \ - des/set_key.c \ - des/str2key.c \ - des/xcbc_enc.c \ - dh/dh_ameth.c \ - dh/dh_asn1.c \ - dh/dh_check.c \ - dh/dh_depr.c \ - dh/dh_err.c \ - dh/dh_gen.c \ - dh/dh_key.c \ - dh/dh_lib.c \ - dh/dh_pmeth.c \ - dsa/dsa_ameth.c \ - dsa/dsa_asn1.c \ - dsa/dsa_depr.c \ - dsa/dsa_err.c \ - dsa/dsa_gen.c \ - dsa/dsa_key.c \ - dsa/dsa_lib.c \ - dsa/dsa_ossl.c \ - dsa/dsa_pmeth.c \ - dsa/dsa_prn.c \ - dsa/dsa_sign.c \ - dsa/dsa_vrf.c \ - dso/dso_dl.c \ - dso/dso_dlfcn.c \ - dso/dso_err.c \ - dso/dso_lib.c \ - dso/dso_null.c \ - dso/dso_openssl.c \ - ec/ec2_mult.c \ - ec/ec2_smpl.c \ - ec/ec_ameth.c \ - ec/ec_asn1.c \ - ec/ec_check.c \ - ec/ec_curve.c \ - ec/ec_cvt.c \ - ec/ec_err.c \ - ec/ec_key.c \ - ec/ec_lib.c \ - ec/ec_mult.c \ - ec/ec_pmeth.c \ - ec/ec_print.c \ - ec/eck_prn.c \ - ec/ecp_mont.c \ - ec/ecp_nist.c \ - ec/ecp_smpl.c \ - ecdh/ech_err.c \ - ecdh/ech_key.c \ - ecdh/ech_lib.c \ - ecdh/ech_ossl.c \ - ecdsa/ecs_asn1.c \ - ecdsa/ecs_err.c \ - ecdsa/ecs_lib.c \ - ecdsa/ecs_ossl.c \ - ecdsa/ecs_sign.c \ - ecdsa/ecs_vrf.c \ - err/err.c \ - err/err_all.c \ - err/err_prn.c \ - evp/bio_b64.c \ - evp/bio_enc.c \ - evp/bio_md.c \ - evp/bio_ok.c \ - evp/c_all.c \ - evp/c_allc.c \ - evp/c_alld.c \ - evp/digest.c \ - evp/e_aes.c \ - evp/e_bf.c \ - evp/e_des.c \ - evp/e_des3.c \ - evp/e_null.c \ - evp/e_old.c \ - evp/e_rc2.c \ - evp/e_rc4.c \ - evp/e_rc5.c \ - evp/e_xcbc_d.c \ - evp/encode.c \ - evp/evp_acnf.c \ - evp/evp_enc.c \ - evp/evp_err.c \ - evp/evp_key.c \ - evp/evp_lib.c \ - evp/evp_pbe.c \ - evp/evp_pkey.c \ - evp/m_dss.c \ - evp/m_dss1.c \ - evp/m_ecdsa.c \ - evp/m_md4.c \ - evp/m_md5.c \ - evp/m_mdc2.c \ - evp/m_null.c \ - evp/m_ripemd.c \ - evp/m_sha1.c \ - evp/m_sigver.c \ - evp/m_wp.c \ - evp/names.c \ - evp/p5_crpt.c \ - evp/p5_crpt2.c \ - evp/p_dec.c \ - evp/p_enc.c \ - evp/p_lib.c \ - evp/p_open.c \ - evp/p_seal.c \ - evp/p_sign.c \ - evp/p_verify.c \ - evp/pmeth_fn.c \ - evp/pmeth_gn.c \ - evp/pmeth_lib.c \ - hmac/hm_ameth.c \ - hmac/hm_pmeth.c \ - hmac/hmac.c \ - krb5/krb5_asn.c \ - lhash/lh_stats.c \ - lhash/lhash.c \ - md4/md4_dgst.c \ - md4/md4_one.c \ - md5/md5_dgst.c \ - md5/md5_one.c \ - modes/cbc128.c \ - modes/cfb128.c \ - modes/ctr128.c \ - modes/ofb128.c \ - objects/o_names.c \ - objects/obj_dat.c \ - objects/obj_err.c \ - objects/obj_lib.c \ - objects/obj_xref.c \ - ocsp/ocsp_asn.c \ - ocsp/ocsp_cl.c \ - ocsp/ocsp_err.c \ - ocsp/ocsp_ext.c \ - ocsp/ocsp_ht.c \ - ocsp/ocsp_lib.c \ - ocsp/ocsp_prn.c \ - ocsp/ocsp_srv.c \ - ocsp/ocsp_vfy.c \ - pem/pem_all.c \ - pem/pem_err.c \ - pem/pem_info.c \ - pem/pem_lib.c \ - pem/pem_oth.c \ - pem/pem_pk8.c \ - pem/pem_pkey.c \ - pem/pem_seal.c \ - pem/pem_sign.c \ - pem/pem_x509.c \ - pem/pem_xaux.c \ - pem/pvkfmt.c \ - pkcs12/p12_add.c \ - pkcs12/p12_asn.c \ - pkcs12/p12_attr.c \ - pkcs12/p12_crpt.c \ - pkcs12/p12_crt.c \ - pkcs12/p12_decr.c \ - pkcs12/p12_init.c \ - pkcs12/p12_key.c \ - pkcs12/p12_kiss.c \ - pkcs12/p12_mutl.c \ - pkcs12/p12_npas.c \ - pkcs12/p12_p8d.c \ - pkcs12/p12_p8e.c \ - pkcs12/p12_utl.c \ - pkcs12/pk12err.c \ - pkcs7/pk7_asn1.c \ - pkcs7/pk7_attr.c \ - pkcs7/pk7_doit.c \ - pkcs7/pk7_lib.c \ - pkcs7/pk7_mime.c \ - pkcs7/pk7_smime.c \ - pkcs7/pkcs7err.c \ - rand/md_rand.c \ - rand/rand_egd.c \ - rand/rand_err.c \ - rand/rand_lib.c \ - rand/rand_unix.c \ - rand/randfile.c \ - rc2/rc2_cbc.c \ - rc2/rc2_ecb.c \ - rc2/rc2_skey.c \ - rc2/rc2cfb64.c \ - rc2/rc2ofb64.c \ - rc4/rc4_enc.c \ - rc4/rc4_skey.c \ - ripemd/rmd_dgst.c \ - ripemd/rmd_one.c \ - rsa/rsa_ameth.c \ - rsa/rsa_asn1.c \ - rsa/rsa_chk.c \ - rsa/rsa_eay.c \ - rsa/rsa_err.c \ - rsa/rsa_gen.c \ - rsa/rsa_lib.c \ - rsa/rsa_none.c \ - rsa/rsa_null.c \ - rsa/rsa_oaep.c \ - rsa/rsa_pk1.c \ - rsa/rsa_pmeth.c \ - rsa/rsa_prn.c \ - rsa/rsa_pss.c \ - rsa/rsa_saos.c \ - rsa/rsa_sign.c \ - rsa/rsa_ssl.c \ - rsa/rsa_x931.c \ - sha/sha1_one.c \ - sha/sha1dgst.c \ - sha/sha256.c \ - sha/sha512.c \ - sha/sha_dgst.c \ - stack/stack.c \ - ts/ts_err.c \ - txt_db/txt_db.c \ - ui/ui_compat.c \ - ui/ui_err.c \ - ui/ui_lib.c \ - ui/ui_openssl.c \ - ui/ui_util.c \ - x509/by_dir.c \ - x509/by_file.c \ - x509/x509_att.c \ - x509/x509_cmp.c \ - x509/x509_d2.c \ - x509/x509_def.c \ - x509/x509_err.c \ - x509/x509_ext.c \ - x509/x509_lu.c \ - x509/x509_obj.c \ - x509/x509_r2x.c \ - x509/x509_req.c \ - x509/x509_set.c \ - x509/x509_trs.c \ - x509/x509_txt.c \ - x509/x509_v3.c \ - x509/x509_vfy.c \ - x509/x509_vpm.c \ - x509/x509cset.c \ - x509/x509name.c \ - x509/x509rset.c \ - x509/x509spki.c \ - x509/x509type.c \ - x509/x_all.c \ - x509v3/pcy_cache.c \ - x509v3/pcy_data.c \ - x509v3/pcy_lib.c \ - x509v3/pcy_map.c \ - x509v3/pcy_node.c \ - x509v3/pcy_tree.c \ - x509v3/v3_akey.c \ - x509v3/v3_akeya.c \ - x509v3/v3_alt.c \ - x509v3/v3_bcons.c \ - x509v3/v3_bitst.c \ - x509v3/v3_conf.c \ - x509v3/v3_cpols.c \ - x509v3/v3_crld.c \ - x509v3/v3_enum.c \ - x509v3/v3_extku.c \ - x509v3/v3_genn.c \ - x509v3/v3_ia5.c \ - x509v3/v3_info.c \ - x509v3/v3_int.c \ - x509v3/v3_lib.c \ - x509v3/v3_ncons.c \ - x509v3/v3_ocsp.c \ - x509v3/v3_pci.c \ - x509v3/v3_pcia.c \ - x509v3/v3_pcons.c \ - x509v3/v3_pku.c \ - x509v3/v3_pmaps.c \ - x509v3/v3_prn.c \ - x509v3/v3_purp.c \ - x509v3/v3_skey.c \ - x509v3/v3_sxnet.c \ - x509v3/v3_utl.c \ - x509v3/v3err.c - -local_c_includes := \ - openssl \ - openssl/crypto/asn1 \ - openssl/crypto/evp \ - openssl/include \ - openssl/include/openssl \ - zlib - -local_c_flags := -DNO_WINDOWS_BRAINDEATH - -####################################### -# target static library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif - -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -LOCAL_C_INCLUDES += $(local_c_includes) -ifeq ($(TARGET_ARCH),arm) - LOCAL_SRC_FILES += $(arm_src_files) - LOCAL_CFLAGS += $(arm_cflags) -else - LOCAL_SRC_FILES += $(non_arm_src_files) -endif -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto_static -include $(BUILD_STATIC_LIBRARY) - -####################################### -# target shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -#ifneq ($(TARGET_ARCH),x86) -#LOCAL_NDK_VERSION := 5 -#LOCAL_SDK_VERSION := 9 -## Use the NDK prebuilt libz and libdl. -LOCAL_LDFLAGS += -lz -ldl -#else -#LOCAL_SHARED_LIBRARIES += libz libdl -#endif - -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -LOCAL_C_INCLUDES += $(local_c_includes) -ifeq ($(TARGET_ARCH),arm) - LOCAL_SRC_FILES += $(arm_src_files) - LOCAL_CFLAGS += $(arm_cflags) -else - LOCAL_SRC_FILES += $(non_arm_src_files) -endif -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto -include $(BUILD_SHARED_LIBRARY) - -####################################### -# Host shared library -#include $(CLEAR_VARS) -##include $(LOCAL_PATH)/../android-config.mk -#LOCAL_SRC_FILES += $(local_src_files) -#LOCAL_CFLAGS += $(local_c_flags) -DPURIFY -#LOCAL_C_INCLUDES += $(local_c_includes) -#LOCAL_SRC_FILES += $(non_arm_src_files) -#LOCAL_STATIC_LIBRARIES += libz -#LOCAL_LDLIBS += -ldl -#LOCAL_MODULE_TAGS := optional -#LOCAL_MODULE:= libcrypto -#include $(BUILD_HOST_SHARED_LIBRARY) - -######################################## -# host static library, which is used by some SDK tools. - -#include $(CLEAR_VARS) -#include $(LOCAL_PATH)/../android-config.mk -#LOCAL_SRC_FILES += $(local_src_files) -#LOCAL_CFLAGS += $(local_c_flags) -DPURIFY -#LOCAL_C_INCLUDES += $(local_c_includes) -#LOCAL_SRC_FILES += $(non_arm_src_files) -#LOCAL_STATIC_LIBRARIES += libz -#LOCAL_LDLIBS += -ldl -#LOCAL_MODULE_TAGS := optional -#LOCAL_MODULE:= libcrypto_static -#include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/app/openssl/crypto/LPdir_win32.c b/app/openssl/crypto/LPdir_win32.c new file mode 100644 index 00000000..e39872da --- /dev/null +++ b/app/openssl/crypto/LPdir_win32.c @@ -0,0 +1,30 @@ +/* $LP: LPlib/source/LPdir_win32.c,v 1.3 2004/08/26 13:36:05 _cvs_levitte Exp $ */ +/* + * Copyright (c) 2004, Richard Levitte + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define LP_SYS_WIN32 +#define LP_MULTIBYTE_AVAILABLE +#include "LPdir_win.c" diff --git a/app/openssl/crypto/aes/aes.h b/app/openssl/crypto/aes/aes.h index d2c99730..031abf01 100644 --- a/app/openssl/crypto/aes/aes.h +++ b/app/openssl/crypto/aes/aes.h @@ -90,6 +90,11 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits, int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + void AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key); void AES_decrypt(const unsigned char *in, unsigned char *out, diff --git a/app/openssl/crypto/aes/aes_core.c b/app/openssl/crypto/aes/aes_core.c index a7ec54f4..8f5210ac 100644 --- a/app/openssl/crypto/aes/aes_core.c +++ b/app/openssl/crypto/aes/aes_core.c @@ -625,7 +625,7 @@ static const u32 rcon[] = { /** * Expand the cipher key into the encryption key schedule. */ -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, +int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) { u32 *rk; @@ -726,7 +726,7 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits, /** * Expand the cipher key into the decryption key schedule. */ -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, +int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) { u32 *rk; @@ -734,7 +734,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, u32 temp; /* first, start with an encryption schedule */ - status = AES_set_encrypt_key(userKey, bits, key); + status = private_AES_set_encrypt_key(userKey, bits, key); if (status < 0) return status; @@ -1201,7 +1201,7 @@ static const u32 rcon[] = { /** * Expand the cipher key into the encryption key schedule. */ -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, +int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) { u32 *rk; int i = 0; @@ -1301,7 +1301,7 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits, /** * Expand the cipher key into the decryption key schedule. */ -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, +int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) { u32 *rk; @@ -1309,7 +1309,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, u32 temp; /* first, start with an encryption schedule */ - status = AES_set_encrypt_key(userKey, bits, key); + status = private_AES_set_encrypt_key(userKey, bits, key); if (status < 0) return status; diff --git a/app/openssl/crypto/aes/aes_misc.c b/app/openssl/crypto/aes/aes_misc.c index 4fead1b4..f083488e 100644 --- a/app/openssl/crypto/aes/aes_misc.c +++ b/app/openssl/crypto/aes/aes_misc.c @@ -50,6 +50,7 @@ */ #include +#include #include #include "aes_locl.h" @@ -62,3 +63,23 @@ const char *AES_options(void) { return "aes(partial)"; #endif } + +/* FIPS wrapper functions to block low level AES calls in FIPS mode */ + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) + { +#ifdef OPENSSL_FIPS + fips_cipher_abort(AES); +#endif + return private_AES_set_encrypt_key(userKey, bits, key); + } + +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) + { +#ifdef OPENSSL_FIPS + fips_cipher_abort(AES); +#endif + return private_AES_set_decrypt_key(userKey, bits, key); + } diff --git a/app/openssl/crypto/aes/asm/aes-586.S b/app/openssl/crypto/aes/asm/aes-586.S new file mode 100644 index 00000000..20c0238f --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-586.S @@ -0,0 +1,3239 @@ +.file "aes-586.s" +.text +.type _x86_AES_encrypt_compact,@function +.align 16 +_x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %ecx,%ebp + xorl %edi,%esi + xorl %esi,%ecx + roll $24,%ecx + xorl %esi,%ecx + rorl $16,%ebp + xorl %ebp,%ecx + rorl $8,%ebp + xorl %ebp,%ecx + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %edx,%ebp + xorl %edi,%esi + xorl %esi,%edx + roll $24,%edx + xorl %esi,%edx + rorl $16,%ebp + xorl %ebp,%edx + rorl $8,%ebp + xorl %ebp,%edx + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %eax,%ebp + xorl %edi,%esi + xorl %esi,%eax + roll $24,%eax + xorl %esi,%eax + rorl $16,%ebp + xorl %ebp,%eax + rorl $8,%ebp + xorl %ebp,%eax + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %ebx,%ebp + xorl %edi,%esi + xorl %esi,%ebx + roll $24,%ebx + xorl %esi,%ebx + rorl $16,%ebp + xorl %ebp,%ebx + rorl $8,%ebp + xorl %ebp,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact +.type _sse_AES_encrypt_compact,@function +.align 16 +_sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + pshufw $13,%mm0,%mm2 + movzbl %ah,%edx + movzbl -128(%ebp,%edx,1),%edx + shll $8,%edx + shrl $16,%eax + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + pshufw $8,%mm4,%mm6 + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movd %ecx,%mm0 + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + movd %mm2,%eax + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + movd %mm6,%ebx + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm1 + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%ecx + shrl $16,%ebx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + shrl $16,%eax + punpckldq %mm1,%mm0 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + andl $255,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $16,%eax + orl %eax,%edx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm4 + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + orl %ebx,%edx + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L001loop +.align 16 +.L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact +.type _x86_AES_encrypt,@function +.align 16 +_x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.size _x86_AES_encrypt,.-_x86_AES_encrypt +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +AES_encrypt: +.L_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal _GLOBAL_OFFSET_TABLE_+[.-.L004pic_point](%ebp),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + leal .LAES_Te-.L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_encrypt,.-.L_AES_encrypt_begin +.type _x86_AES_decrypt_compact,@function +.align 16 +_x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + roll $24,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $16,%ebx + xorl %ebp,%ecx + roll $8,%ebp + xorl %eax,%ecx + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + roll $24,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $16,%ecx + xorl %ebp,%edx + roll $8,%ebp + xorl %ebx,%edx + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + roll $24,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $16,%edx + xorl %ebp,%eax + roll $8,%ebp + xorl %ecx,%eax + xorl %edx,%eax + xorl %ebp,%eax + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + roll $24,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $16,%edx + xorl %ebp,%ebx + roll $8,%ebp + xorl %ecx,%ebx + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact +.type _sse_AES_decrypt_compact,@function +.align 16 +_sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L007loop: + pshufw $12,%mm0,%mm1 + movd %mm1,%eax + pshufw $9,%mm4,%mm5 + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + movd %mm5,%ebx + movzbl %ah,%edx + movzbl -128(%ebp,%edx,1),%edx + shll $8,%edx + pshufw $6,%mm0,%mm2 + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + shrl $16,%eax + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + pshufw $3,%mm4,%mm6 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm0 + movzbl %al,%esi + movd %mm2,%eax + movzbl -128(%ebp,%esi,1),%ecx + shll $16,%ecx + movzbl %bl,%esi + movd %mm6,%ebx + movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%edx + movd %edx,%mm1 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%edx + shll $8,%edx + movzbl %bh,%esi + shrl $16,%eax + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + punpckldq %mm1,%mm0 + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + orl %ebx,%edx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%edx + movd %edx,%mm4 + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + orl %eax,%ecx + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L007loop +.align 16 +.L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact +.type _x86_AES_decrypt,@function +.align 16 +_x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.size _x86_AES_decrypt,.-_x86_AES_decrypt +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +AES_decrypt: +.L_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L010pic_point +.L010pic_point: + popl %ebp + leal _GLOBAL_OFFSET_TABLE_+[.-.L010pic_point](%ebp),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + leal .LAES_Td-.L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_decrypt,.-.L_AES_decrypt_begin +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: +.L_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L012drop_out + call .L013pic_point +.L013pic_point: + popl %ebp + leal _GLOBAL_OFFSET_TABLE_+[.-.L013pic_point](%ebp),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + cmpl $0,40(%esp) + leal .LAES_Te-.L013pic_point(%ebp),%ebp + jne .L014picked_te + leal .LAES_Td-.LAES_Te(%ebp),%ebp +.L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb .L015slow_way + testl $15,%ecx + jnz .L015slow_way + btl $28,(%eax) + jc .L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb .L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp .L017tbl_ok +.align 4 +.L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 4 +.L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb .L018do_copy + cmpl $3852,%ebx + jb .L019skip_copy +.align 4 +.L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +.L019skip_copy: + movl $16,%edi +.align 4 +.L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz .L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je .L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 16 +.L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L023skip_ezero: + movl 28(%esp),%esp + popfl +.L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L021fast_decrypt: + cmpl 36(%esp),%esi + je .L024fast_dec_in_place + movl %edi,52(%esp) +.align 4 +.align 16 +.L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L026fast_dec_out +.align 16 +.L024fast_dec_in_place: +.L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L027fast_dec_in_place_loop +.align 4 +.L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je .L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb .L030slow_enc_tail + btl $25,52(%esp) + jnc .L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 16 +.L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L032slow_enc_loop_sse + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L033slow_enc_loop_x86 + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L034enc_in_place +.align 4 +.long 2767451785 + jmp .L035enc_skip_in_place +.L034enc_in_place: + leal (%edi,%ecx,1),%edi +.L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp .L033slow_enc_loop_x86 +.align 16 +.L029slow_decrypt: + btl $25,52(%esp) + jnc .L036slow_dec_loop_x86 +.align 4 +.L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc .L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz .L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz .L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin +.type _x86_AES_set_encrypt_key,@function +.align 16 +_x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz .L040badpointer + testl $-1,%edi + jz .L040badpointer + call .L041pic_point +.L041pic_point: + popl %ebp + leal .LAES_Te-.L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je .L04210rounds + cmpl $192,%ecx + je .L04312rounds + cmpl $256,%ecx + je .L04414rounds + movl $-2,%eax + jmp .L045exit +.L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp .L04610shortcut +.align 4 +.L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +.L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl .L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp .L045exit +.L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp .L04812shortcut +.align 4 +.L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +.L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je .L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp .L04912loop +.L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp .L045exit +.L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp .L05114shortcut +.align 4 +.L05214loop: + movl 28(%edi),%edx +.L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je .L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp .L05214loop +.L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp .L045exit +.L040badpointer: + movl $-1,%eax +.L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function +.align 16 +private_AES_set_encrypt_key: +.L_private_AES_set_encrypt_key_begin: + call _x86_AES_set_encrypt_key + ret +.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function +.align 16 +private_AES_set_decrypt_key: +.L_private_AES_set_decrypt_key_begin: + call _x86_AES_set_encrypt_key + cmpl $0,%eax + je .L054proceed + ret +.L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 4 +.L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne .L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 4 +.L056permute: + addl $16,%edi + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb .L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/aes/asm/aes-586.pl b/app/openssl/crypto/aes/asm/aes-586.pl old mode 100755 new mode 100644 index aab40e6f..51b500dd --- a/app/openssl/crypto/aes/asm/aes-586.pl +++ b/app/openssl/crypto/aes/asm/aes-586.pl @@ -39,13 +39,13 @@ # but exhibits up to 10% improvement on other cores. # # Second version is "monolithic" replacement for aes_core.c, which in -# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key. +# addition to AES_[de|en]crypt implements private_AES_set_[de|en]cryption_key. # This made it possible to implement little-endian variant of the # algorithm without modifying the base C code. Motivating factor for # the undertaken effort was that it appeared that in tight IA-32 # register window little-endian flavor could achieve slightly higher # Instruction Level Parallelism, and it indeed resulted in up to 15% -# better performance on most recent µ-archs... +# better performance on most recent µ-archs... # # Third version adds AES_cbc_encrypt implementation, which resulted in # up to 40% performance imrovement of CBC benchmark results. 40% was @@ -223,7 +223,7 @@ sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } } $speed_limit=512; # chunks smaller than $speed_limit are # processed with compact routine in CBC mode $small_footprint=1; # $small_footprint=1 code is ~5% slower [on - # recent µ-archs], but ~5 times smaller! + # recent µ-archs], but ~5 times smaller! # I favor compact code to minimize cache # contention and in hope to "collect" 5% back # in real-life applications... @@ -562,7 +562,7 @@ sub enctransform() # Performance is not actually extraordinary in comparison to pure # x86 code. In particular encrypt performance is virtually the same. # Decrypt performance on the other hand is 15-20% better on newer -# µ-archs [but we're thankful for *any* improvement here], and ~50% +# µ-archs [but we're thankful for *any* improvement here], and ~50% # better on PIII:-) And additionally on the pros side this code # eliminates redundant references to stack and thus relieves/ # minimizes the pressure on the memory bus. @@ -2854,12 +2854,12 @@ sub enckey() &set_label("exit"); &function_end("_x86_AES_set_encrypt_key"); -# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, +# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) -&function_begin_B("AES_set_encrypt_key"); +&function_begin_B("private_AES_set_encrypt_key"); &call ("_x86_AES_set_encrypt_key"); &ret (); -&function_end_B("AES_set_encrypt_key"); +&function_end_B("private_AES_set_encrypt_key"); sub deckey() { my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_; @@ -2916,9 +2916,9 @@ sub deckey() &mov (&DWP(4*$i,$key),$tp1); } -# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, +# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) -&function_begin_B("AES_set_decrypt_key"); +&function_begin_B("private_AES_set_decrypt_key"); &call ("_x86_AES_set_encrypt_key"); &cmp ("eax",0); &je (&label("proceed")); @@ -2974,7 +2974,7 @@ sub deckey() &jb (&label("permute")); &xor ("eax","eax"); # return success -&function_end("AES_set_decrypt_key"); +&function_end("private_AES_set_decrypt_key"); &asciz("AES for x86, CRYPTOGAMS by "); &asm_finish(); diff --git a/app/openssl/crypto/aes/asm/aes-armv4.S b/app/openssl/crypto/aes/asm/aes-armv4.S new file mode 120000 index 00000000..88959108 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-armv4.S @@ -0,0 +1 @@ +aes-armv4.s \ No newline at end of file diff --git a/app/openssl/crypto/aes/asm/aes-armv4.pl b/app/openssl/crypto/aes/asm/aes-armv4.pl index c51ee1fb..86b86c4a 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.pl +++ b/app/openssl/crypto/aes/asm/aes-armv4.pl @@ -27,6 +27,11 @@ # Rescheduling for dual-issue pipeline resulted in 12% improvement on # Cortex A8 core and ~25 cycles per byte processed with 128-bit key. +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~21.5 cycles per byte. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -46,6 +51,7 @@ $key="r11"; $rounds="r12"; $code=<<___; +#include "arm_arch.h" .text .code 32 @@ -166,7 +172,7 @@ AES_encrypt: mov $rounds,r0 @ inp mov $key,r2 sub $tbl,r3,#AES_encrypt-AES_Te @ Te - +#if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... ldrb $t2,[$rounds,#1] @@ -195,10 +201,33 @@ AES_encrypt: orr $s3,$s3,$t1,lsl#8 orr $s3,$s3,$t2,lsl#16 orr $s3,$s3,$t3,lsl#24 - +#else + ldr $s0,[$rounds,#0] + ldr $s1,[$rounds,#4] + ldr $s2,[$rounds,#8] + ldr $s3,[$rounds,#12] +#ifdef __ARMEL__ + rev $s0,$s0 + rev $s1,$s1 + rev $s2,$s2 + rev $s3,$s3 +#endif +#endif bl _armv4_AES_encrypt ldr $rounds,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev $s0,$s0 + rev $s1,$s1 + rev $s2,$s2 + rev $s3,$s3 +#endif + str $s0,[$rounds,#0] + str $s1,[$rounds,#4] + str $s2,[$rounds,#8] + str $s3,[$rounds,#12] +#else mov $t1,$s0,lsr#24 @ write output in endian-neutral mov $t2,$s0,lsr#16 @ manner... mov $t3,$s0,lsr#8 @@ -227,11 +256,15 @@ AES_encrypt: strb $t2,[$rounds,#13] strb $t3,[$rounds,#14] strb $s3,[$rounds,#15] - +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size AES_encrypt,.-AES_encrypt .type _armv4_AES_encrypt,%function @@ -271,11 +304,11 @@ _armv4_AES_encrypt: and $i2,lr,$s2,lsr#16 @ i1 eor $t3,$t3,$i3,ror#8 and $i3,lr,$s2 - eor $s1,$s1,$t1,ror#24 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] + eor $s1,$s1,$t1,ror#24 + ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] mov $s2,$s2,lsr#24 - ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] eor $s0,$s0,$i1,ror#16 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] @@ -284,16 +317,16 @@ _armv4_AES_encrypt: and $i2,lr,$s3,lsr#8 @ i1 eor $t3,$t3,$i3,ror#16 and $i3,lr,$s3,lsr#16 @ i2 - eor $s2,$s2,$t2,ror#16 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] + eor $s2,$s2,$t2,ror#16 + ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] mov $s3,$s3,lsr#24 - ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] eor $s0,$s0,$i1,ror#24 - ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] - eor $s1,$s1,$i2,ror#16 ldr $i1,[$key],#16 + eor $s1,$s1,$i2,ror#16 + ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] eor $s2,$s2,$i3,ror#8 ldr $t1,[$key,#-12] eor $s3,$s3,$t3,ror#8 @@ -333,11 +366,11 @@ _armv4_AES_encrypt: and $i2,lr,$s2,lsr#16 @ i1 eor $t3,$i3,$t3,lsl#8 and $i3,lr,$s2 - eor $s1,$t1,$s1,lsl#24 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] + eor $s1,$t1,$s1,lsl#24 + ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] mov $s2,$s2,lsr#24 - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] eor $s0,$i1,$s0,lsl#8 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] @@ -346,15 +379,15 @@ _armv4_AES_encrypt: and $i2,lr,$s3,lsr#8 @ i1 eor $t3,$i3,$t3,lsl#8 and $i3,lr,$s3,lsr#16 @ i2 - eor $s2,$t2,$s2,lsl#24 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] + eor $s2,$t2,$s2,lsl#24 + ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] mov $s3,$s3,lsr#24 - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] eor $s0,$i1,$s0,lsl#8 - ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] ldr $i1,[$key,#0] + ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] eor $s1,$s1,$i2,lsl#8 ldr $t1,[$key,#4] eor $s2,$s2,$i3,lsl#16 @@ -371,10 +404,11 @@ _armv4_AES_encrypt: ldr pc,[sp],#4 @ pop and return .size _armv4_AES_encrypt,.-_armv4_AES_encrypt -.global AES_set_encrypt_key -.type AES_set_encrypt_key,%function +.global private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,%function .align 5 -AES_set_encrypt_key: +private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: sub r3,pc,#8 @ AES_set_encrypt_key teq r0,#0 moveq r0,#-1 @@ -392,12 +426,13 @@ AES_set_encrypt_key: bne .Labrt .Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 + sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 mov $rounds,r0 @ inp mov lr,r1 @ bits mov $key,r2 @ key +#if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... ldrb $t2,[$rounds,#1] @@ -430,6 +465,22 @@ AES_set_encrypt_key: orr $s3,$s3,$t3,lsl#24 str $s2,[$key,#-8] str $s3,[$key,#-4] +#else + ldr $s0,[$rounds,#0] + ldr $s1,[$rounds,#4] + ldr $s2,[$rounds,#8] + ldr $s3,[$rounds,#12] +#ifdef __ARMEL__ + rev $s0,$s0 + rev $s1,$s1 + rev $s2,$s2 + rev $s3,$s3 +#endif + str $s0,[$key],#16 + str $s1,[$key,#-12] + str $s2,[$key,#-8] + str $s3,[$key,#-4] +#endif teq lr,#128 bne .Lnot128 @@ -466,6 +517,7 @@ AES_set_encrypt_key: b .Ldone .Lnot128: +#if __ARM_ARCH__<7 ldrb $i2,[$rounds,#19] ldrb $t1,[$rounds,#18] ldrb $t2,[$rounds,#17] @@ -482,6 +534,16 @@ AES_set_encrypt_key: str $i2,[$key],#8 orr $i3,$i3,$t3,lsl#24 str $i3,[$key,#-4] +#else + ldr $i2,[$rounds,#16] + ldr $i3,[$rounds,#20] +#ifdef __ARMEL__ + rev $i2,$i2 + rev $i3,$i3 +#endif + str $i2,[$key],#8 + str $i3,[$key,#-4] +#endif teq lr,#192 bne .Lnot192 @@ -526,6 +588,7 @@ AES_set_encrypt_key: b .L192_loop .Lnot192: +#if __ARM_ARCH__<7 ldrb $i2,[$rounds,#27] ldrb $t1,[$rounds,#26] ldrb $t2,[$rounds,#25] @@ -542,6 +605,16 @@ AES_set_encrypt_key: str $i2,[$key],#8 orr $i3,$i3,$t3,lsl#24 str $i3,[$key,#-4] +#else + ldr $i2,[$rounds,#24] + ldr $i3,[$rounds,#28] +#ifdef __ARMEL__ + rev $i2,$i2 + rev $i3,$i3 +#endif + str $i2,[$key],#8 + str $i3,[$key,#-4] +#endif mov $rounds,#14 str $rounds,[$key,#240-32] @@ -606,14 +679,14 @@ AES_set_encrypt_key: .Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) -.size AES_set_encrypt_key,.-AES_set_encrypt_key +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key -.global AES_set_decrypt_key -.type AES_set_decrypt_key,%function +.global private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,%function .align 5 -AES_set_decrypt_key: +private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr - bl AES_set_encrypt_key + bl _armv4_AES_set_encrypt_key teq r0,#0 ldrne lr,[sp],#4 @ pop lr bne .Labrt @@ -692,11 +765,15 @@ $code.=<<___; bne .Lmix mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) -.size AES_set_decrypt_key,.-AES_set_decrypt_key +#endif +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .type AES_Td,%object .align 5 @@ -811,7 +888,7 @@ AES_decrypt: mov $rounds,r0 @ inp mov $key,r2 sub $tbl,r3,#AES_decrypt-AES_Td @ Td - +#if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... ldrb $t2,[$rounds,#1] @@ -840,10 +917,33 @@ AES_decrypt: orr $s3,$s3,$t1,lsl#8 orr $s3,$s3,$t2,lsl#16 orr $s3,$s3,$t3,lsl#24 - +#else + ldr $s0,[$rounds,#0] + ldr $s1,[$rounds,#4] + ldr $s2,[$rounds,#8] + ldr $s3,[$rounds,#12] +#ifdef __ARMEL__ + rev $s0,$s0 + rev $s1,$s1 + rev $s2,$s2 + rev $s3,$s3 +#endif +#endif bl _armv4_AES_decrypt ldr $rounds,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev $s0,$s0 + rev $s1,$s1 + rev $s2,$s2 + rev $s3,$s3 +#endif + str $s0,[$rounds,#0] + str $s1,[$rounds,#4] + str $s2,[$rounds,#8] + str $s3,[$rounds,#12] +#else mov $t1,$s0,lsr#24 @ write output in endian-neutral mov $t2,$s0,lsr#16 @ manner... mov $t3,$s0,lsr#8 @@ -872,11 +972,15 @@ AES_decrypt: strb $t2,[$rounds,#13] strb $t3,[$rounds,#14] strb $s3,[$rounds,#15] - +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size AES_decrypt,.-AES_decrypt .type _armv4_AES_decrypt,%function @@ -916,11 +1020,11 @@ _armv4_AES_decrypt: and $i2,lr,$s2 @ i1 eor $t3,$i3,$t3,ror#8 and $i3,lr,$s2,lsr#16 - eor $s1,$s1,$t1,ror#8 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] + eor $s1,$s1,$t1,ror#8 + ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] mov $s2,$s2,lsr#24 - ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] eor $s0,$s0,$i1,ror#16 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] @@ -929,22 +1033,22 @@ _armv4_AES_decrypt: and $i2,lr,$s3,lsr#8 @ i1 eor $t3,$i3,$t3,ror#8 and $i3,lr,$s3 @ i2 - eor $s2,$s2,$t2,ror#8 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] + eor $s2,$s2,$t2,ror#8 + ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] mov $s3,$s3,lsr#24 - ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] eor $s0,$s0,$i1,ror#8 - ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] + ldr $i1,[$key],#16 eor $s1,$s1,$i2,ror#16 + ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] eor $s2,$s2,$i3,ror#24 - ldr $i1,[$key],#16 - eor $s3,$s3,$t3,ror#8 ldr $t1,[$key,#-12] - ldr $t2,[$key,#-8] eor $s0,$s0,$i1 + ldr $t2,[$key,#-8] + eor $s3,$s3,$t3,ror#8 ldr $t3,[$key,#-4] and $i1,lr,$s0,lsr#16 eor $s1,$s1,$t1 @@ -985,11 +1089,11 @@ _armv4_AES_decrypt: and $i1,lr,$s2,lsr#8 @ i0 eor $t2,$t2,$i2,lsl#8 and $i2,lr,$s2 @ i1 - eor $t3,$t3,$i3,lsl#8 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] + eor $t3,$t3,$i3,lsl#8 + ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 - ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] @@ -997,11 +1101,11 @@ _armv4_AES_decrypt: and $i1,lr,$s3,lsr#16 @ i0 eor $s2,$t2,$s2,lsl#16 and $i2,lr,$s3,lsr#8 @ i1 - eor $t3,$t3,$i3,lsl#16 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] + eor $t3,$t3,$i3,lsl#16 + ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 - ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 diff --git a/app/openssl/crypto/aes/asm/aes-armv4.s b/app/openssl/crypto/aes/asm/aes-armv4.s index 27c681c7..2697d4ce 100644 --- a/app/openssl/crypto/aes/asm/aes-armv4.s +++ b/app/openssl/crypto/aes/asm/aes-armv4.s @@ -1,3 +1,4 @@ +#include "arm_arch.h" .text .code 32 @@ -118,7 +119,7 @@ AES_encrypt: mov r12,r0 @ inp mov r11,r2 sub r10,r3,#AES_encrypt-AES_Te @ Te - +#if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... ldrb r5,[r12,#1] @@ -147,10 +148,33 @@ AES_encrypt: orr r3,r3,r4,lsl#8 orr r3,r3,r5,lsl#16 orr r3,r3,r6,lsl#24 - +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif bl _armv4_AES_encrypt ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else mov r4,r0,lsr#24 @ write output in endian-neutral mov r5,r0,lsr#16 @ manner... mov r6,r0,lsr#8 @@ -179,11 +203,15 @@ AES_encrypt: strb r5,[r12,#13] strb r6,[r12,#14] strb r3,[r12,#15] - +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size AES_encrypt,.-AES_encrypt .type _armv4_AES_encrypt,%function @@ -223,11 +251,11 @@ _armv4_AES_encrypt: and r8,lr,r2,lsr#16 @ i1 eor r6,r6,r9,ror#8 and r9,lr,r2 - eor r1,r1,r4,ror#24 ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] mov r2,r2,lsr#24 - ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] eor r0,r0,r7,ror#16 ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] @@ -236,16 +264,16 @@ _armv4_AES_encrypt: and r8,lr,r3,lsr#8 @ i1 eor r6,r6,r9,ror#16 and r9,lr,r3,lsr#16 @ i2 - eor r2,r2,r5,ror#16 ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] mov r3,r3,lsr#24 - ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] eor r0,r0,r7,ror#24 - ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] - eor r1,r1,r8,ror#16 ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] eor r2,r2,r9,ror#8 ldr r4,[r11,#-12] eor r3,r3,r6,ror#8 @@ -285,11 +313,11 @@ _armv4_AES_encrypt: and r8,lr,r2,lsr#16 @ i1 eor r6,r9,r6,lsl#8 and r9,lr,r2 - eor r1,r4,r1,lsl#24 ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] mov r2,r2,lsr#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] eor r0,r7,r0,lsl#8 ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] @@ -298,15 +326,15 @@ _armv4_AES_encrypt: and r8,lr,r3,lsr#8 @ i1 eor r6,r9,r6,lsl#8 and r9,lr,r3,lsr#16 @ i2 - eor r2,r5,r2,lsl#24 ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] mov r3,r3,lsr#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] eor r0,r7,r0,lsl#8 - ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] eor r1,r1,r8,lsl#8 ldr r4,[r11,#4] eor r2,r2,r9,lsl#16 @@ -323,10 +351,11 @@ _armv4_AES_encrypt: ldr pc,[sp],#4 @ pop and return .size _armv4_AES_encrypt,.-_armv4_AES_encrypt -.global AES_set_encrypt_key -.type AES_set_encrypt_key,%function +.global private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,%function .align 5 -AES_set_encrypt_key: +private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: sub r3,pc,#8 @ AES_set_encrypt_key teq r0,#0 moveq r0,#-1 @@ -344,12 +373,13 @@ AES_set_encrypt_key: bne .Labrt .Lok: stmdb sp!,{r4-r12,lr} - sub r10,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 mov r12,r0 @ inp mov lr,r1 @ bits mov r11,r2 @ key +#if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... ldrb r5,[r12,#1] @@ -382,6 +412,22 @@ AES_set_encrypt_key: orr r3,r3,r6,lsl#24 str r2,[r11,#-8] str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif teq lr,#128 bne .Lnot128 @@ -418,6 +464,7 @@ AES_set_encrypt_key: b .Ldone .Lnot128: +#if __ARM_ARCH__<7 ldrb r8,[r12,#19] ldrb r4,[r12,#18] ldrb r5,[r12,#17] @@ -434,6 +481,16 @@ AES_set_encrypt_key: str r8,[r11],#8 orr r9,r9,r6,lsl#24 str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif teq lr,#192 bne .Lnot192 @@ -478,6 +535,7 @@ AES_set_encrypt_key: b .L192_loop .Lnot192: +#if __ARM_ARCH__<7 ldrb r8,[r12,#27] ldrb r4,[r12,#26] ldrb r5,[r12,#25] @@ -494,6 +552,16 @@ AES_set_encrypt_key: str r8,[r11],#8 orr r9,r9,r6,lsl#24 str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif mov r12,#14 str r12,[r11,#240-32] @@ -558,14 +626,14 @@ AES_set_encrypt_key: .Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) -.size AES_set_encrypt_key,.-AES_set_encrypt_key +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key -.global AES_set_decrypt_key -.type AES_set_decrypt_key,%function +.global private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,%function .align 5 -AES_set_decrypt_key: +private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr - bl AES_set_encrypt_key + bl _armv4_AES_set_encrypt_key teq r0,#0 ldrne lr,[sp],#4 @ pop lr bne .Labrt @@ -639,11 +707,15 @@ AES_set_decrypt_key: bne .Lmix mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) -.size AES_set_decrypt_key,.-AES_set_decrypt_key +#endif +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .type AES_Td,%object .align 5 @@ -758,7 +830,7 @@ AES_decrypt: mov r12,r0 @ inp mov r11,r2 sub r10,r3,#AES_decrypt-AES_Td @ Td - +#if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... ldrb r5,[r12,#1] @@ -787,10 +859,33 @@ AES_decrypt: orr r3,r3,r4,lsl#8 orr r3,r3,r5,lsl#16 orr r3,r3,r6,lsl#24 - +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif bl _armv4_AES_decrypt ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else mov r4,r0,lsr#24 @ write output in endian-neutral mov r5,r0,lsr#16 @ manner... mov r6,r0,lsr#8 @@ -819,11 +914,15 @@ AES_decrypt: strb r5,[r12,#13] strb r6,[r12,#14] strb r3,[r12,#15] - +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size AES_decrypt,.-AES_decrypt .type _armv4_AES_decrypt,%function @@ -863,11 +962,11 @@ _armv4_AES_decrypt: and r8,lr,r2 @ i1 eor r6,r9,r6,ror#8 and r9,lr,r2,lsr#16 - eor r1,r1,r4,ror#8 ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] mov r2,r2,lsr#24 - ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] eor r0,r0,r7,ror#16 ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] @@ -876,22 +975,22 @@ _armv4_AES_decrypt: and r8,lr,r3,lsr#8 @ i1 eor r6,r9,r6,ror#8 and r9,lr,r3 @ i2 - eor r2,r2,r5,ror#8 ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] mov r3,r3,lsr#24 - ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] eor r0,r0,r7,ror#8 - ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + ldr r7,[r11],#16 eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] eor r2,r2,r9,ror#24 - ldr r7,[r11],#16 - eor r3,r3,r6,ror#8 ldr r4,[r11,#-12] - ldr r5,[r11,#-8] eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 ldr r6,[r11,#-4] and r7,lr,r0,lsr#16 eor r1,r1,r4 @@ -932,11 +1031,11 @@ _armv4_AES_decrypt: and r7,lr,r2,lsr#8 @ i0 eor r5,r5,r8,lsl#8 and r8,lr,r2 @ i1 - eor r6,r6,r9,lsl#8 ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] and r9,lr,r2,lsr#16 - ldrb r8,[r10,r8] @ Td4[s2>>0] ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] eor r0,r0,r7,lsl#8 ldrb r9,[r10,r9] @ Td4[s2>>16] @@ -944,11 +1043,11 @@ _armv4_AES_decrypt: and r7,lr,r3,lsr#16 @ i0 eor r2,r5,r2,lsl#16 and r8,lr,r3,lsr#8 @ i1 - eor r6,r6,r9,lsl#16 ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] and r9,lr,r3 @ i2 - ldrb r8,[r10,r8] @ Td4[s3>>8] ldrb r9,[r10,r9] @ Td4[s3>>0] ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] eor r0,r0,r7,lsl#16 diff --git a/app/openssl/crypto/aes/asm/aes-mips.S b/app/openssl/crypto/aes/asm/aes-mips.S new file mode 100644 index 00000000..f5750bf8 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-mips.S @@ -0,0 +1,1337 @@ +.text +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +#if !defined(__vxworks) || defined(__pic__) +.option pic2 +#endif +.set noat +.align 5 +.ent _mips_AES_encrypt +_mips_AES_encrypt: + .frame $29,0,$31 + .set reorder + lw $12,0($6) + lw $13,4($6) + lw $14,8($6) + lw $15,12($6) + lw $30,240($6) + add $3,$6,16 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + sub $30,1 + srl $1,$9,6 +.Loop_enc: + srl $2,$10,6 + srl $24,$11,6 + srl $25,$8,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $12,2($1) # Te1[s1>>16] + lwl $13,2($2) # Te1[s2>>16] + lwl $14,2($24) # Te1[s3>>16] + lwl $15,2($25) # Te1[s0>>16] + lwr $12,3($1) # Te1[s1>>16] + lwr $13,3($2) # Te1[s2>>16] + lwr $14,3($24) # Te1[s3>>16] + lwr $15,3($25) # Te1[s0>>16] + + srl $1,$10,14 + srl $2,$11,14 + srl $24,$8,14 + srl $25,$9,14 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $16,1($1) # Te2[s2>>8] + lwl $17,1($2) # Te2[s3>>8] + lwl $18,1($24) # Te2[s0>>8] + lwl $19,1($25) # Te2[s1>>8] + lwr $16,2($1) # Te2[s2>>8] + lwr $17,2($2) # Te2[s3>>8] + lwr $18,2($24) # Te2[s0>>8] + lwr $19,2($25) # Te2[s1>>8] + + srl $1,$11,22 + srl $2,$8,22 + srl $24,$9,22 + srl $25,$10,22 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $20,0($1) # Te3[s3] + lwl $21,0($2) # Te3[s0] + lwl $22,0($24) # Te3[s1] + lwl $23,0($25) # Te3[s2] + lwr $20,1($1) # Te3[s3] + lwr $21,1($2) # Te3[s0] + lwr $22,1($24) # Te3[s1] + lwr $23,1($25) # Te3[s2] + + sll $1,$8,2 + sll $2,$9,2 + sll $24,$10,2 + sll $25,$11,2 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + lw $16,0($1) # Te0[s0>>24] + lw $17,0($2) # Te0[s1>>24] + lw $18,0($24) # Te0[s2>>24] + lw $19,0($25) # Te0[s3>>24] + + lw $8,0($3) + lw $9,4($3) + lw $10,8($3) + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_enc + srl $1,$9,6 + + .set reorder + srl $2,$10,6 + srl $24,$11,6 + srl $25,$8,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $12,2($1) # Te4[s1>>16] + lbu $13,2($2) # Te4[s2>>16] + lbu $14,2($24) # Te4[s3>>16] + lbu $15,2($25) # Te4[s0>>16] + + srl $1,$10,14 + srl $2,$11,14 + srl $24,$8,14 + srl $25,$9,14 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $16,2($1) # Te4[s2>>8] + lbu $17,2($2) # Te4[s3>>8] + lbu $18,2($24) # Te4[s0>>8] + lbu $19,2($25) # Te4[s1>>8] + + sll $1,$8,2 + sll $2,$9,2 + sll $24,$10,2 + sll $25,$11,2 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,2($1) # Te4[s0>>24] + lbu $21,2($2) # Te4[s1>>24] + lbu $22,2($24) # Te4[s2>>24] + lbu $23,2($25) # Te4[s3>>24] + + srl $1,$11,22 + srl $2,$8,22 + srl $24,$9,22 + srl $25,$10,22 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + sll $16,$16,16 + sll $17,$17,16 + sll $18,$18,16 + sll $19,$19,16 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $16,2($1) # Te4[s3] + lbu $17,2($2) # Te4[s0] + lbu $18,2($24) # Te4[s1] + lbu $19,2($25) # Te4[s2] + + #sll $20,$20,0 + #sll $21,$21,0 + #sll $22,$22,0 + #sll $23,$23,0 + + lw $8,0($3) + lw $9,4($3) + lw $10,8($3) + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + sll $16,$16,24 + sll $17,$17,24 + sll $18,$18,24 + sll $19,$19,24 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + jr $31 +.end _mips_AES_encrypt + +.align 5 +.globl AES_encrypt +.ent AES_encrypt +AES_encrypt: + .frame $29,64,$31 + .mask 3237937152,-4 + .set noreorder + .cpload $25 + sub $29,64 + sw $31,64-1*4($29) + sw $30,64-2*4($29) + sw $23,64-3*4($29) + sw $22,64-4*4($29) + sw $21,64-5*4($29) + sw $20,64-6*4($29) + sw $19,64-7*4($29) + sw $18,64-8*4($29) + sw $17,64-9*4($29) + sw $16,64-10*4($29) + .set reorder + la $7,AES_Te # PIC-ified 'load address' + + lwl $8,0+3($4) + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + + bal _mips_AES_encrypt + + swr $8,0+0($5) + swr $9,4+0($5) + swr $10,8+0($5) + swr $11,12+0($5) + swl $8,0+3($5) + swl $9,4+3($5) + swl $10,8+3($5) + swl $11,12+3($5) + + .set noreorder + lw $31,64-1*4($29) + lw $30,64-2*4($29) + lw $23,64-3*4($29) + lw $22,64-4*4($29) + lw $21,64-5*4($29) + lw $20,64-6*4($29) + lw $19,64-7*4($29) + lw $18,64-8*4($29) + lw $17,64-9*4($29) + lw $16,64-10*4($29) + jr $31 + add $29,64 +.end AES_encrypt +.align 5 +.ent _mips_AES_decrypt +_mips_AES_decrypt: + .frame $29,0,$31 + .set reorder + lw $12,0($6) + lw $13,4($6) + lw $14,8($6) + lw $15,12($6) + lw $30,240($6) + add $3,$6,16 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + sub $30,1 + srl $1,$11,6 +.Loop_dec: + srl $2,$8,6 + srl $24,$9,6 + srl $25,$10,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $12,2($1) # Td1[s3>>16] + lwl $13,2($2) # Td1[s0>>16] + lwl $14,2($24) # Td1[s1>>16] + lwl $15,2($25) # Td1[s2>>16] + lwr $12,3($1) # Td1[s3>>16] + lwr $13,3($2) # Td1[s0>>16] + lwr $14,3($24) # Td1[s1>>16] + lwr $15,3($25) # Td1[s2>>16] + + srl $1,$10,14 + srl $2,$11,14 + srl $24,$8,14 + srl $25,$9,14 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $16,1($1) # Td2[s2>>8] + lwl $17,1($2) # Td2[s3>>8] + lwl $18,1($24) # Td2[s0>>8] + lwl $19,1($25) # Td2[s1>>8] + lwr $16,2($1) # Td2[s2>>8] + lwr $17,2($2) # Td2[s3>>8] + lwr $18,2($24) # Td2[s0>>8] + lwr $19,2($25) # Td2[s1>>8] + + srl $1,$9,22 + srl $2,$10,22 + srl $24,$11,22 + srl $25,$8,22 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $20,0($1) # Td3[s1] + lwl $21,0($2) # Td3[s2] + lwl $22,0($24) # Td3[s3] + lwl $23,0($25) # Td3[s0] + lwr $20,1($1) # Td3[s1] + lwr $21,1($2) # Td3[s2] + lwr $22,1($24) # Td3[s3] + lwr $23,1($25) # Td3[s0] + + sll $1,$8,2 + sll $2,$9,2 + sll $24,$10,2 + sll $25,$11,2 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + + lw $16,0($1) # Td0[s0>>24] + lw $17,0($2) # Td0[s1>>24] + lw $18,0($24) # Td0[s2>>24] + lw $19,0($25) # Td0[s3>>24] + + lw $8,0($3) + lw $9,4($3) + lw $10,8($3) + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_dec + srl $1,$11,6 + + .set reorder + lw $16,1024($7) # prefetch Td4 + lw $17,1024+32($7) + lw $18,1024+64($7) + lw $19,1024+96($7) + lw $20,1024+128($7) + lw $21,1024+160($7) + lw $22,1024+192($7) + lw $23,1024+224($7) + + srl $1,$11,8 + srl $2,$8,8 + srl $24,$9,8 + srl $25,$10,8 + and $1,0xff + and $2,0xff + and $24,0xff + and $25,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $12,1024($1) # Td4[s3>>16] + lbu $13,1024($2) # Td4[s0>>16] + lbu $14,1024($24) # Td4[s1>>16] + lbu $15,1024($25) # Td4[s2>>16] + + srl $1,$10,16 + srl $2,$11,16 + srl $24,$8,16 + srl $25,$9,16 + and $1,0xff + and $2,0xff + and $24,0xff + and $25,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $16,1024($1) # Td4[s2>>8] + lbu $17,1024($2) # Td4[s3>>8] + lbu $18,1024($24) # Td4[s0>>8] + lbu $19,1024($25) # Td4[s1>>8] + + and $1,$8,0xff + and $2,$9,0xff + and $24,$10,0xff + and $25,$11,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,1024($1) # Td4[s0>>24] + lbu $21,1024($2) # Td4[s1>>24] + lbu $22,1024($24) # Td4[s2>>24] + lbu $23,1024($25) # Td4[s3>>24] + + srl $1,$9,24 + srl $2,$10,24 + srl $24,$11,24 + srl $25,$8,24 + + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + sll $16,$16,16 + sll $17,$17,16 + sll $18,$18,16 + sll $19,$19,16 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $16,1024($1) # Td4[s1] + lbu $17,1024($2) # Td4[s2] + lbu $18,1024($24) # Td4[s3] + lbu $19,1024($25) # Td4[s0] + + #sll $20,$20,0 + #sll $21,$21,0 + #sll $22,$22,0 + #sll $23,$23,0 + + lw $8,0($3) + lw $9,4($3) + lw $10,8($3) + lw $11,12($3) + + sll $16,$16,24 + sll $17,$17,24 + sll $18,$18,24 + sll $19,$19,24 + + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + jr $31 +.end _mips_AES_decrypt + +.align 5 +.globl AES_decrypt +.ent AES_decrypt +AES_decrypt: + .frame $29,64,$31 + .mask 3237937152,-4 + .set noreorder + .cpload $25 + sub $29,64 + sw $31,64-1*4($29) + sw $30,64-2*4($29) + sw $23,64-3*4($29) + sw $22,64-4*4($29) + sw $21,64-5*4($29) + sw $20,64-6*4($29) + sw $19,64-7*4($29) + sw $18,64-8*4($29) + sw $17,64-9*4($29) + sw $16,64-10*4($29) + .set reorder + la $7,AES_Td # PIC-ified 'load address' + + lwl $8,0+3($4) + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + + bal _mips_AES_decrypt + + swr $8,0+0($5) + swr $9,4+0($5) + swr $10,8+0($5) + swr $11,12+0($5) + swl $8,0+3($5) + swl $9,4+3($5) + swl $10,8+3($5) + swl $11,12+3($5) + + .set noreorder + lw $31,64-1*4($29) + lw $30,64-2*4($29) + lw $23,64-3*4($29) + lw $22,64-4*4($29) + lw $21,64-5*4($29) + lw $20,64-6*4($29) + lw $19,64-7*4($29) + lw $18,64-8*4($29) + lw $17,64-9*4($29) + lw $16,64-10*4($29) + jr $31 + add $29,64 +.end AES_decrypt +.align 5 +.ent _mips_AES_set_encrypt_key +_mips_AES_set_encrypt_key: + .frame $29,0,$31 + .set noreorder + beqz $4,.Lekey_done + li $2,-1 + beqz $6,.Lekey_done + add $3,$7,1024+256 + + .set reorder + lwl $8,0+3($4) # load 128 bits + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + li $1,128 + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + .set noreorder + beq $5,$1,.L128bits + li $30,10 + + .set reorder + lwl $12,16+3($4) # load 192 bits + lwl $13,20+3($4) + li $1,192 + lwr $12,16+0($4) + lwr $13,20+0($4) + .set noreorder + beq $5,$1,.L192bits + li $30,8 + + .set reorder + lwl $14,24+3($4) # load 256 bits + lwl $15,28+3($4) + li $1,256 + lwr $14,24+0($4) + lwr $15,28+0($4) + .set noreorder + beq $5,$1,.L256bits + li $30,7 + + b .Lekey_done + li $2,-2 + +.align 4 +.L128bits: + .set reorder + srl $1,$11,16 + srl $2,$11,8 + and $1,0xff + and $2,0xff + and $24,$11,0xff + srl $25,$11,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,1024($1) + lbu $2,1024($2) + lbu $24,1024($24) + lbu $25,1024($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sub $30,1 + add $6,16 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + + .set noreorder + bnez $30,.L128bits + add $3,4 + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + li $30,10 + sw $11,12($6) + li $2,0 + sw $30,80($6) + b .Lekey_done + sub $6,10*16 + +.align 4 +.L192bits: + .set reorder + srl $1,$13,16 + srl $2,$13,8 + and $1,0xff + and $2,0xff + and $24,$13,0xff + srl $25,$13,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,1024($1) + lbu $2,1024($2) + lbu $24,1024($24) + lbu $25,1024($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sw $12,16($6) + sw $13,20($6) + sub $30,1 + add $6,24 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + xor $12,$11 + xor $13,$12 + + .set noreorder + bnez $30,.L192bits + add $3,4 + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + li $30,12 + sw $11,12($6) + li $2,0 + sw $30,48($6) + b .Lekey_done + sub $6,12*16 + +.align 4 +.L256bits: + .set reorder + srl $1,$15,16 + srl $2,$15,8 + and $1,0xff + and $2,0xff + and $24,$15,0xff + srl $25,$15,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,1024($1) + lbu $2,1024($2) + lbu $24,1024($24) + lbu $25,1024($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sw $12,16($6) + sw $13,20($6) + sw $14,24($6) + sw $15,28($6) + sub $30,1 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + beqz $30,.L256bits_done + + srl $1,$11,24 + srl $2,$11,16 + srl $24,$11,8 + and $25,$11,0xff + and $2,0xff + and $24,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,1024($1) + lbu $2,1024($2) + lbu $24,1024($24) + lbu $25,1024($25) + sll $1,24 + sll $2,16 + sll $24,8 + + xor $12,$1 + xor $12,$2 + xor $12,$24 + xor $12,$25 + + xor $13,$12 + xor $14,$13 + xor $15,$14 + + add $6,32 + .set noreorder + b .L256bits + add $3,4 + +.L256bits_done: + sw $8,32($6) + sw $9,36($6) + sw $10,40($6) + li $30,14 + sw $11,44($6) + li $2,0 + sw $30,48($6) + sub $6,12*16 + +.Lekey_done: + jr $31 + nop +.end _mips_AES_set_encrypt_key + +.globl private_AES_set_encrypt_key +.ent private_AES_set_encrypt_key +private_AES_set_encrypt_key: + .frame $29,32,$31 + .mask 3221225472,-4 + .set noreorder + .cpload $25 + sub $29,32 + sw $31,32-1*4($29) + sw $30,32-2*4($29) + .set reorder + la $7,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + .set noreorder + move $4,$2 + lw $31,32-1*4($29) + lw $30,32-2*4($29) + jr $31 + add $29,32 +.end private_AES_set_encrypt_key +.align 5 +.globl private_AES_set_decrypt_key +.ent private_AES_set_decrypt_key +private_AES_set_decrypt_key: + .frame $29,32,$31 + .mask 3221225472,-4 + .set noreorder + .cpload $25 + sub $29,32 + sw $31,32-1*4($29) + sw $30,32-2*4($29) + .set reorder + la $7,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + bltz $2,.Ldkey_done + + sll $1,$30,4 + add $4,$6,0 + add $5,$6,$1 +.align 4 +.Lswap: + lw $8,0($4) + lw $9,4($4) + lw $10,8($4) + lw $11,12($4) + lw $12,0($5) + lw $13,4($5) + lw $14,8($5) + lw $15,12($5) + sw $8,0($5) + sw $9,4($5) + sw $10,8($5) + sw $11,12($5) + add $4,16 + sub $5,16 + sw $12,-16($4) + sw $13,-12($4) + sw $14,-8($4) + sw $15,-4($4) + bne $4,$5,.Lswap + + lw $8,16($6) # modulo-scheduled + lui $2,0x8080 + sub $30,1 + or $2,0x8080 + sll $30,2 + add $6,16 + lui $25,0x1b1b + nor $24,$0,$2 + or $25,0x1b1b +.align 4 +.Lmix: + and $1,$8,$2 + and $9,$8,$24 + srl $10,$1,7 + addu $9,$9 # tp2<<1 + subu $1,$10 + and $1,$25 + xor $9,$1 + + and $1,$9,$2 + and $10,$9,$24 + srl $11,$1,7 + addu $10,$10 # tp4<<1 + subu $1,$11 + and $1,$25 + xor $10,$1 + + and $1,$10,$2 + and $11,$10,$24 + srl $12,$1,7 + addu $11,$11 # tp8<<1 + subu $1,$12 + and $1,$25 + xor $11,$1 + + xor $12,$11,$8 + xor $15,$11,$10 + xor $13,$12,$9 + xor $14,$12,$10 + + sll $8,$14,16 + xor $15,$9 + srl $9,$14,16 + xor $15,$8 + sll $8,$12,8 + xor $15,$9 + srl $9,$12,24 + xor $15,$8 + sll $8,$13,24 + xor $15,$9 + srl $9,$13,8 + xor $15,$8 + lw $8,4($6) # modulo-scheduled + xor $15,$9 + sub $30,1 + sw $15,0($6) + add $6,4 + bnez $30,.Lmix + + li $2,0 +.Ldkey_done: + .set noreorder + move $4,$2 + lw $31,32-1*4($29) + lw $30,32-2*4($29) + jr $31 + add $29,32 +.end private_AES_set_decrypt_key +.rdata +.align 6 +AES_Te: +.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 +.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d +.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd +.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 +.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 +.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d +.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 +.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a +.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d +.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 +.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb +.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b +.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 +.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea +.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 +.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b +.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c +.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a +.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 +.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f +.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 +.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 +.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 +.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f +.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 +.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e +.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 +.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 +.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 +.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d +.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 +.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f +.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e +.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e +.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 +.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb +.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d +.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce +.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e +.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 +.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 +.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c +.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f +.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed +.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 +.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b +.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 +.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a +.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a +.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 +.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 +.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 +.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 +.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 +.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 +.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 +.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe +.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a +.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc +.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 +.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 +.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 +.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a +.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d +.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 +.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f +.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 +.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 +.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 +.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 +.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 +.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 +.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 +.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f +.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e +.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 +.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 +.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c +.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 +.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 +.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 +.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e +.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a +.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 +.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e +.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 +.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 +.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b +.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 +.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 +.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 +.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 +.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa +.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 +.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e +.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 +.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 +.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 +.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 +.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 +.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c +.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 +.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc +.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 +.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 +.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa +.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 +.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 +.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f +.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 +.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 +.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 +.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 +.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 +.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 +.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 +.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 +.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 +.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff +.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a +.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 +.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 +.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 +.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 +.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 +.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 +.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc +.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a + +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon +.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 +.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 +.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 +.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + +.align 6 +AES_Td: +.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 +.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 +.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 +.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 +.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 +.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 +.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 +.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f +.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 +.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 +.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 +.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 +.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 +.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda +.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 +.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 +.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 +.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd +.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 +.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 +.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 +.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 +.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 +.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 +.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 +.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 +.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 +.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a +.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 +.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 +.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 +.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c +.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 +.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 +.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 +.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a +.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 +.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 +.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa +.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 +.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d +.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 +.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 +.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff +.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 +.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 +.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 +.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb +.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 +.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 +.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 +.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e +.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 +.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 +.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 +.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a +.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f +.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e +.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 +.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 +.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 +.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d +.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad +.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 +.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c +.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd +.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc +.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 +.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc +.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 +.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 +.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 +.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 +.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d +.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 +.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 +.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 +.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 +.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a +.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef +.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 +.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 +.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 +.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 +.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d +.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 +.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 +.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 +.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c +.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 +.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 +.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b +.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 +.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 +.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e +.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 +.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce +.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 +.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 +.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 +.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 +.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 +.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 +.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f +.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d +.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf +.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b +.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f +.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d +.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e +.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 +.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 +.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a +.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 +.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 +.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c +.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f +.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf +.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b +.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 +.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e +.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f +.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c +.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 +.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde +.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 +.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 +.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 + +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d diff --git a/app/openssl/crypto/aes/asm/aes-mips.pl b/app/openssl/crypto/aes/asm/aes-mips.pl new file mode 100644 index 00000000..e5239542 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-mips.pl @@ -0,0 +1,1611 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# AES for MIPS + +# October 2010 +# +# Code uses 1K[+256B] S-box and on single-issue core [such as R5000] +# spends ~68 cycles per byte processed with 128-bit key. This is ~16% +# faster than gcc-generated code, which is not very impressive. But +# recall that compressed S-box requires extra processing, namely +# additional rotations. Rotations are implemented with lwl/lwr pairs, +# which is normally used for loading unaligned data. Another cool +# thing about this module is its endian neutrality, which means that +# it processes data without ever changing byte order... + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $PTR_SLL="dsll"; # incidentally works even on n32 + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $PTR_SLL="sll"; + $SZREG=4; +} +$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; +# +# +# +###################################################################### + +$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; + +for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +if (!defined($big_endian)) +{ $big_endian=(unpack('L',pack('N',1))==1); } + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($MSB,$LSB)=(0,3); # automatically converted to little-endian + +$code.=<<___; +.text +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +#if !defined(__vxworks) || defined(__pic__) +.option pic2 +#endif +.set noat +___ + +{{{ +my $FRAMESIZE=16*$SZREG; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; + +my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7); +my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); +my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); +my ($key0,$cnt)=($gp,$fp); + +# instuction ordering is "stolen" from output from MIPSpro assembler +# invoked with -mips3 -O3 arguments... +$code.=<<___; +.align 5 +.ent _mips_AES_encrypt +_mips_AES_encrypt: + .frame $sp,0,$ra + .set reorder + lw $t0,0($key) + lw $t1,4($key) + lw $t2,8($key) + lw $t3,12($key) + lw $cnt,240($key) + $PTR_ADD $key0,$key,16 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + sub $cnt,1 + _xtr $i0,$s1,16-2 +.Loop_enc: + _xtr $i1,$s2,16-2 + _xtr $i2,$s3,16-2 + _xtr $i3,$s0,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t0,3($i0) # Te1[s1>>16] + lwl $t1,3($i1) # Te1[s2>>16] + lwl $t2,3($i2) # Te1[s3>>16] + lwl $t3,3($i3) # Te1[s0>>16] + lwr $t0,2($i0) # Te1[s1>>16] + lwr $t1,2($i1) # Te1[s2>>16] + lwr $t2,2($i2) # Te1[s3>>16] + lwr $t3,2($i3) # Te1[s0>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t4,2($i0) # Te2[s2>>8] + lwl $t5,2($i1) # Te2[s3>>8] + lwl $t6,2($i2) # Te2[s0>>8] + lwl $t7,2($i3) # Te2[s1>>8] + lwr $t4,1($i0) # Te2[s2>>8] + lwr $t5,1($i1) # Te2[s3>>8] + lwr $t6,1($i2) # Te2[s0>>8] + lwr $t7,1($i3) # Te2[s1>>8] + + _xtr $i0,$s3,0-2 + _xtr $i1,$s0,0-2 + _xtr $i2,$s1,0-2 + _xtr $i3,$s2,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t8,1($i0) # Te3[s3] + lwl $t9,1($i1) # Te3[s0] + lwl $t10,1($i2) # Te3[s1] + lwl $t11,1($i3) # Te3[s2] + lwr $t8,0($i0) # Te3[s3] + lwr $t9,0($i1) # Te3[s0] + lwr $t10,0($i2) # Te3[s1] + lwr $t11,0($i3) # Te3[s2] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + lw $t4,0($i0) # Te0[s0>>24] + lw $t5,0($i1) # Te0[s1>>24] + lw $t6,0($i2) # Te0[s2>>24] + lw $t7,0($i3) # Te0[s3>>24] + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_enc + _xtr $i0,$s1,16-2 + + .set reorder + _xtr $i1,$s2,16-2 + _xtr $i2,$s3,16-2 + _xtr $i3,$s0,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t0,2($i0) # Te4[s1>>16] + lbu $t1,2($i1) # Te4[s2>>16] + lbu $t2,2($i2) # Te4[s3>>16] + lbu $t3,2($i3) # Te4[s0>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,2($i0) # Te4[s2>>8] + lbu $t5,2($i1) # Te4[s3>>8] + lbu $t6,2($i2) # Te4[s0>>8] + lbu $t7,2($i3) # Te4[s1>>8] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,2($i0) # Te4[s0>>24] + lbu $t9,2($i1) # Te4[s1>>24] + lbu $t10,2($i2) # Te4[s2>>24] + lbu $t11,2($i3) # Te4[s3>>24] + + _xtr $i0,$s3,0-2 + _xtr $i1,$s0,0-2 + _xtr $i2,$s1,0-2 + _xtr $i3,$s2,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + + _ins $t4,8 + _ins $t5,8 + _ins $t6,8 + _ins $t7,8 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,2($i0) # Te4[s3] + lbu $t5,2($i1) # Te4[s0] + lbu $t6,2($i2) # Te4[s1] + lbu $t7,2($i3) # Te4[s2] + + _ins $t8,24 + _ins $t9,24 + _ins $t10,24 + _ins $t11,24 + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + _ins $t4,0 + _ins $t5,0 + _ins $t6,0 + _ins $t7,0 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + jr $ra +.end _mips_AES_encrypt + +.align 5 +.globl AES_encrypt +.ent AES_encrypt +AES_encrypt: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_encrypt +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + lwl $s0,0+$MSB($inp) + lwl $s1,4+$MSB($inp) + lwl $s2,8+$MSB($inp) + lwl $s3,12+$MSB($inp) + lwr $s0,0+$LSB($inp) + lwr $s1,4+$LSB($inp) + lwr $s2,8+$LSB($inp) + lwr $s3,12+$LSB($inp) + + bal _mips_AES_encrypt + + swr $s0,0+$LSB($out) + swr $s1,4+$LSB($out) + swr $s2,8+$LSB($out) + swr $s3,12+$LSB($out) + swl $s0,0+$MSB($out) + swl $s1,4+$MSB($out) + swl $s2,8+$MSB($out) + swl $s3,12+$MSB($out) + + .set noreorder + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_encrypt +___ + +$code.=<<___; +.align 5 +.ent _mips_AES_decrypt +_mips_AES_decrypt: + .frame $sp,0,$ra + .set reorder + lw $t0,0($key) + lw $t1,4($key) + lw $t2,8($key) + lw $t3,12($key) + lw $cnt,240($key) + $PTR_ADD $key0,$key,16 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + sub $cnt,1 + _xtr $i0,$s3,16-2 +.Loop_dec: + _xtr $i1,$s0,16-2 + _xtr $i2,$s1,16-2 + _xtr $i3,$s2,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t0,3($i0) # Td1[s3>>16] + lwl $t1,3($i1) # Td1[s0>>16] + lwl $t2,3($i2) # Td1[s1>>16] + lwl $t3,3($i3) # Td1[s2>>16] + lwr $t0,2($i0) # Td1[s3>>16] + lwr $t1,2($i1) # Td1[s0>>16] + lwr $t2,2($i2) # Td1[s1>>16] + lwr $t3,2($i3) # Td1[s2>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t4,2($i0) # Td2[s2>>8] + lwl $t5,2($i1) # Td2[s3>>8] + lwl $t6,2($i2) # Td2[s0>>8] + lwl $t7,2($i3) # Td2[s1>>8] + lwr $t4,1($i0) # Td2[s2>>8] + lwr $t5,1($i1) # Td2[s3>>8] + lwr $t6,1($i2) # Td2[s0>>8] + lwr $t7,1($i3) # Td2[s1>>8] + + _xtr $i0,$s1,0-2 + _xtr $i1,$s2,0-2 + _xtr $i2,$s3,0-2 + _xtr $i3,$s0,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t8,1($i0) # Td3[s1] + lwl $t9,1($i1) # Td3[s2] + lwl $t10,1($i2) # Td3[s3] + lwl $t11,1($i3) # Td3[s0] + lwr $t8,0($i0) # Td3[s1] + lwr $t9,0($i1) # Td3[s2] + lwr $t10,0($i2) # Td3[s3] + lwr $t11,0($i3) # Td3[s0] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + + lw $t4,0($i0) # Td0[s0>>24] + lw $t5,0($i1) # Td0[s1>>24] + lw $t6,0($i2) # Td0[s2>>24] + lw $t7,0($i3) # Td0[s3>>24] + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_dec + _xtr $i0,$s3,16-2 + + .set reorder + lw $t4,1024($Tbl) # prefetch Td4 + lw $t5,1024+32($Tbl) + lw $t6,1024+64($Tbl) + lw $t7,1024+96($Tbl) + lw $t8,1024+128($Tbl) + lw $t9,1024+160($Tbl) + lw $t10,1024+192($Tbl) + lw $t11,1024+224($Tbl) + + _xtr $i0,$s3,16 + _xtr $i1,$s0,16 + _xtr $i2,$s1,16 + _xtr $i3,$s2,16 + and $i0,0xff + and $i1,0xff + and $i2,0xff + and $i3,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t0,1024($i0) # Td4[s3>>16] + lbu $t1,1024($i1) # Td4[s0>>16] + lbu $t2,1024($i2) # Td4[s1>>16] + lbu $t3,1024($i3) # Td4[s2>>16] + + _xtr $i0,$s2,8 + _xtr $i1,$s3,8 + _xtr $i2,$s0,8 + _xtr $i3,$s1,8 + and $i0,0xff + and $i1,0xff + and $i2,0xff + and $i3,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,1024($i0) # Td4[s2>>8] + lbu $t5,1024($i1) # Td4[s3>>8] + lbu $t6,1024($i2) # Td4[s0>>8] + lbu $t7,1024($i3) # Td4[s1>>8] + + _xtr $i0,$s0,24 + _xtr $i1,$s1,24 + _xtr $i2,$s2,24 + _xtr $i3,$s3,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,1024($i0) # Td4[s0>>24] + lbu $t9,1024($i1) # Td4[s1>>24] + lbu $t10,1024($i2) # Td4[s2>>24] + lbu $t11,1024($i3) # Td4[s3>>24] + + _xtr $i0,$s1,0 + _xtr $i1,$s2,0 + _xtr $i2,$s3,0 + _xtr $i3,$s0,0 + + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + + _ins $t4,8 + _ins $t5,8 + _ins $t6,8 + _ins $t7,8 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,1024($i0) # Td4[s1] + lbu $t5,1024($i1) # Td4[s2] + lbu $t6,1024($i2) # Td4[s3] + lbu $t7,1024($i3) # Td4[s0] + + _ins $t8,24 + _ins $t9,24 + _ins $t10,24 + _ins $t11,24 + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + _ins $t4,0 + _ins $t5,0 + _ins $t6,0 + _ins $t7,0 + + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + jr $ra +.end _mips_AES_decrypt + +.align 5 +.globl AES_decrypt +.ent AES_decrypt +AES_decrypt: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_decrypt +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Td # PIC-ified 'load address' + + lwl $s0,0+$MSB($inp) + lwl $s1,4+$MSB($inp) + lwl $s2,8+$MSB($inp) + lwl $s3,12+$MSB($inp) + lwr $s0,0+$LSB($inp) + lwr $s1,4+$LSB($inp) + lwr $s2,8+$LSB($inp) + lwr $s3,12+$LSB($inp) + + bal _mips_AES_decrypt + + swr $s0,0+$LSB($out) + swr $s1,4+$LSB($out) + swr $s2,8+$LSB($out) + swr $s3,12+$LSB($out) + swl $s0,0+$MSB($out) + swl $s1,4+$MSB($out) + swl $s2,8+$MSB($out) + swl $s3,12+$MSB($out) + + .set noreorder + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_decrypt +___ +}}} + +{{{ +my $FRAMESIZE=8*$SZREG; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000; + +my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3); +my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); +my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); +my ($rcon,$cnt)=($gp,$fp); + +$code.=<<___; +.align 5 +.ent _mips_AES_set_encrypt_key +_mips_AES_set_encrypt_key: + .frame $sp,0,$ra + .set noreorder + beqz $inp,.Lekey_done + li $t0,-1 + beqz $key,.Lekey_done + $PTR_ADD $rcon,$Tbl,1024+256 + + .set reorder + lwl $rk0,0+$MSB($inp) # load 128 bits + lwl $rk1,4+$MSB($inp) + lwl $rk2,8+$MSB($inp) + lwl $rk3,12+$MSB($inp) + li $at,128 + lwr $rk0,0+$LSB($inp) + lwr $rk1,4+$LSB($inp) + lwr $rk2,8+$LSB($inp) + lwr $rk3,12+$LSB($inp) + .set noreorder + beq $bits,$at,.L128bits + li $cnt,10 + + .set reorder + lwl $rk4,16+$MSB($inp) # load 192 bits + lwl $rk5,20+$MSB($inp) + li $at,192 + lwr $rk4,16+$LSB($inp) + lwr $rk5,20+$LSB($inp) + .set noreorder + beq $bits,$at,.L192bits + li $cnt,8 + + .set reorder + lwl $rk6,24+$MSB($inp) # load 256 bits + lwl $rk7,28+$MSB($inp) + li $at,256 + lwr $rk6,24+$LSB($inp) + lwr $rk7,28+$LSB($inp) + .set noreorder + beq $bits,$at,.L256bits + li $cnt,7 + + b .Lekey_done + li $t0,-2 + +.align 4 +.L128bits: + .set reorder + srl $i0,$rk3,16 + srl $i1,$rk3,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk3,0xff + srl $i3,$rk3,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sub $cnt,1 + $PTR_ADD $key,16 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + + .set noreorder + bnez $cnt,.L128bits + $PTR_ADD $rcon,4 + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + li $cnt,10 + sw $rk3,12($key) + li $t0,0 + sw $cnt,80($key) + b .Lekey_done + $PTR_SUB $key,10*16 + +.align 4 +.L192bits: + .set reorder + srl $i0,$rk5,16 + srl $i1,$rk5,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk5,0xff + srl $i3,$rk5,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sw $rk4,16($key) + sw $rk5,20($key) + sub $cnt,1 + $PTR_ADD $key,24 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + xor $rk4,$rk3 + xor $rk5,$rk4 + + .set noreorder + bnez $cnt,.L192bits + $PTR_ADD $rcon,4 + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + li $cnt,12 + sw $rk3,12($key) + li $t0,0 + sw $cnt,48($key) + b .Lekey_done + $PTR_SUB $key,12*16 + +.align 4 +.L256bits: + .set reorder + srl $i0,$rk7,16 + srl $i1,$rk7,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk7,0xff + srl $i3,$rk7,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sw $rk4,16($key) + sw $rk5,20($key) + sw $rk6,24($key) + sw $rk7,28($key) + sub $cnt,1 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + beqz $cnt,.L256bits_done + + srl $i0,$rk3,24 + srl $i1,$rk3,16 + srl $i2,$rk3,8 + and $i3,$rk3,0xff + and $i1,0xff + and $i2,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + sll $i0,24 + sll $i1,16 + sll $i2,8 + + xor $rk4,$i0 + xor $rk4,$i1 + xor $rk4,$i2 + xor $rk4,$i3 + + xor $rk5,$rk4 + xor $rk6,$rk5 + xor $rk7,$rk6 + + $PTR_ADD $key,32 + .set noreorder + b .L256bits + $PTR_ADD $rcon,4 + +.L256bits_done: + sw $rk0,32($key) + sw $rk1,36($key) + sw $rk2,40($key) + li $cnt,14 + sw $rk3,44($key) + li $t0,0 + sw $cnt,48($key) + $PTR_SUB $key,12*16 + +.Lekey_done: + jr $ra + nop +.end _mips_AES_set_encrypt_key + +.globl private_AES_set_encrypt_key +.ent private_AES_set_encrypt_key +private_AES_set_encrypt_key: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,private_AES_set_encrypt_key +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + .set noreorder + move $a0,$t0 + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end private_AES_set_encrypt_key +___ + +my ($head,$tail)=($inp,$bits); +my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); +my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); +$code.=<<___; +.align 5 +.globl private_AES_set_decrypt_key +.ent private_AES_set_decrypt_key +private_AES_set_decrypt_key: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,private_AES_set_decrypt_key +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + bltz $t0,.Ldkey_done + + sll $at,$cnt,4 + $PTR_ADD $head,$key,0 + $PTR_ADD $tail,$key,$at +.align 4 +.Lswap: + lw $rk0,0($head) + lw $rk1,4($head) + lw $rk2,8($head) + lw $rk3,12($head) + lw $rk4,0($tail) + lw $rk5,4($tail) + lw $rk6,8($tail) + lw $rk7,12($tail) + sw $rk0,0($tail) + sw $rk1,4($tail) + sw $rk2,8($tail) + sw $rk3,12($tail) + $PTR_ADD $head,16 + $PTR_SUB $tail,16 + sw $rk4,-16($head) + sw $rk5,-12($head) + sw $rk6,-8($head) + sw $rk7,-4($head) + bne $head,$tail,.Lswap + + lw $tp1,16($key) # modulo-scheduled + lui $x80808080,0x8080 + sub $cnt,1 + or $x80808080,0x8080 + sll $cnt,2 + $PTR_ADD $key,16 + lui $x1b1b1b1b,0x1b1b + nor $x7f7f7f7f,$zero,$x80808080 + or $x1b1b1b1b,0x1b1b +.align 4 +.Lmix: + and $m,$tp1,$x80808080 + and $tp2,$tp1,$x7f7f7f7f + srl $tp4,$m,7 + addu $tp2,$tp2 # tp2<<1 + subu $m,$tp4 + and $m,$x1b1b1b1b + xor $tp2,$m + + and $m,$tp2,$x80808080 + and $tp4,$tp2,$x7f7f7f7f + srl $tp8,$m,7 + addu $tp4,$tp4 # tp4<<1 + subu $m,$tp8 + and $m,$x1b1b1b1b + xor $tp4,$m + + and $m,$tp4,$x80808080 + and $tp8,$tp4,$x7f7f7f7f + srl $tp9,$m,7 + addu $tp8,$tp8 # tp8<<1 + subu $m,$tp9 + and $m,$x1b1b1b1b + xor $tp8,$m + + xor $tp9,$tp8,$tp1 + xor $tpe,$tp8,$tp4 + xor $tpb,$tp9,$tp2 + xor $tpd,$tp9,$tp4 + + _ror $tp1,$tpd,16 + xor $tpe,$tp2 + _ror $tp2,$tpd,-16 + xor $tpe,$tp1 + _ror $tp1,$tp9,8 + xor $tpe,$tp2 + _ror $tp2,$tp9,-24 + xor $tpe,$tp1 + _ror $tp1,$tpb,24 + xor $tpe,$tp2 + _ror $tp2,$tpb,-8 + xor $tpe,$tp1 + lw $tp1,4($key) # modulo-scheduled + xor $tpe,$tp2 + sub $cnt,1 + sw $tpe,0($key) + $PTR_ADD $key,4 + bnez $cnt,.Lmix + + li $t0,0 +.Ldkey_done: + .set noreorder + move $a0,$t0 + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end private_AES_set_decrypt_key +___ +}}} + +###################################################################### +# Tables are kept in endian-neutral manner +$code.=<<___; +.rdata +.align 6 +AES_Te: +.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 +.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d +.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd +.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 +.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 +.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d +.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 +.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a +.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d +.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 +.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb +.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b +.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 +.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea +.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 +.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b +.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c +.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a +.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 +.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f +.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 +.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 +.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 +.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f +.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 +.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e +.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 +.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 +.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 +.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d +.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 +.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f +.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e +.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e +.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 +.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb +.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d +.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce +.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e +.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 +.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 +.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c +.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f +.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed +.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 +.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b +.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 +.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a +.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a +.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 +.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 +.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 +.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 +.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 +.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 +.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 +.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe +.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a +.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc +.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 +.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 +.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 +.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a +.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d +.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 +.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f +.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 +.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 +.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 +.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 +.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 +.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 +.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 +.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f +.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e +.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 +.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 +.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c +.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 +.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 +.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 +.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e +.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a +.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 +.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e +.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 +.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 +.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b +.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 +.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 +.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 +.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 +.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa +.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 +.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e +.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 +.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 +.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 +.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 +.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 +.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c +.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 +.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc +.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 +.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 +.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa +.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 +.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 +.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f +.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 +.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 +.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 +.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 +.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 +.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 +.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 +.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 +.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 +.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff +.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a +.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 +.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 +.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 +.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 +.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 +.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 +.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc +.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a + +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon +.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 +.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 +.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 +.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + +.align 6 +AES_Td: +.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 +.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 +.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 +.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 +.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 +.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 +.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 +.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f +.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 +.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 +.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 +.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 +.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 +.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda +.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 +.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 +.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 +.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd +.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 +.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 +.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 +.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 +.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 +.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 +.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 +.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 +.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 +.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a +.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 +.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 +.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 +.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c +.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 +.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 +.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 +.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a +.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 +.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 +.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa +.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 +.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d +.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 +.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 +.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff +.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 +.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 +.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 +.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb +.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 +.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 +.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 +.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e +.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 +.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 +.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 +.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a +.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f +.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e +.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 +.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 +.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 +.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d +.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad +.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 +.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c +.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd +.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc +.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 +.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc +.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 +.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 +.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 +.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 +.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d +.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 +.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 +.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 +.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 +.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a +.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef +.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 +.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 +.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 +.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 +.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d +.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 +.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 +.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 +.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c +.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 +.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 +.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b +.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 +.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 +.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e +.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 +.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce +.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 +.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 +.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 +.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 +.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 +.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 +.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f +.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d +.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf +.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b +.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f +.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d +.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e +.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 +.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 +.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a +.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 +.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 +.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c +.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f +.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf +.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b +.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 +.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e +.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f +.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c +.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 +.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde +.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 +.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 +.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 + +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + # made-up _instructions, _xtr, _ins, _ror and _bias, cope + # with byte order dependencies... + if (/^\s+_/) { + s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/; + + s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/ + sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) + : eval("24-$3"))/e or + s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ + sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) + : eval("24-$3"))/e or + s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/ + sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) + : eval("$3*-1"))/e or + s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ + sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) + : eval("($3-16)&31"))/e; + + s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/ + sprintf("sll\t$1,$2,$3")/e or + s/srl\s+(\$[0-9]+),(\$[0-9]+),0/ + sprintf("and\t$1,$2,0xff")/e or + s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/; + } + + # convert lwl/lwr and swr/swl to little-endian order + if (!$big_endian && /^\s+[sl]w[lr]\s+/) { + s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/ + sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or + s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/ + sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/aes-parisc.pl b/app/openssl/crypto/aes/asm/aes-parisc.pl new file mode 100644 index 00000000..714dcfbb --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-parisc.pl @@ -0,0 +1,1022 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# AES for PA-RISC. +# +# June 2009. +# +# The module is mechanical transliteration of aes-sparcv9.pl, but with +# a twist: S-boxes are compressed even further down to 1K+256B. On +# PA-7100LC performance is ~40% better than gcc 3.2 generated code and +# is about 33 cycles per byte processed with 128-bit key. Newer CPUs +# perform at 16 cycles per byte. It's not faster than code generated +# by vendor compiler, but recall that it has compressed S-boxes, which +# requires extra processing. +# +# Special thanks to polarhome.com for providing HP-UX account. + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; +} else { + $LEVEL ="1.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; +} + +$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker + # [+ argument transfer] +$inp="%r26"; # arg0 +$out="%r25"; # arg1 +$key="%r24"; # arg2 + +($s0,$s1,$s2,$s3) = ("%r1","%r2","%r3","%r4"); +($t0,$t1,$t2,$t3) = ("%r5","%r6","%r7","%r8"); + +($acc0, $acc1, $acc2, $acc3, $acc4, $acc5, $acc6, $acc7, + $acc8, $acc9,$acc10,$acc11,$acc12,$acc13,$acc14,$acc15) = +("%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16", +"%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r26"); + +$tbl="%r28"; +$rounds="%r29"; + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT AES_encrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR + .ALIGN 64 +AES_encrypt + .PROC + .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) + $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) + $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) + $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) + $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) + $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) + $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) + $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) + + blr %r0,$tbl + ldi 3,$t0 +L\$enc_pic + andcm $tbl,$t0,$tbl + ldo L\$AES_Te-L\$enc_pic($tbl),$tbl + + and $inp,$t0,$t0 + sub $inp,$t0,$inp + ldw 0($inp),$s0 + ldw 4($inp),$s1 + ldw 8($inp),$s2 + comib,= 0,$t0,L\$enc_inp_aligned + ldw 12($inp),$s3 + + sh3addl $t0,%r0,$t0 + subi 32,$t0,$t0 + mtctl $t0,%cr11 + ldw 16($inp),$t1 + vshd $s0,$s1,$s0 + vshd $s1,$s2,$s1 + vshd $s2,$s3,$s2 + vshd $s3,$t1,$s3 + +L\$enc_inp_aligned + bl _parisc_AES_encrypt,%r31 + nop + + extru,<> $out,31,2,%r0 + b L\$enc_out_aligned + nop + + _srm $s0,24,$acc0 + _srm $s0,16,$acc1 + stb $acc0,0($out) + _srm $s0,8,$acc2 + stb $acc1,1($out) + _srm $s1,24,$acc4 + stb $acc2,2($out) + _srm $s1,16,$acc5 + stb $s0,3($out) + _srm $s1,8,$acc6 + stb $acc4,4($out) + _srm $s2,24,$acc0 + stb $acc5,5($out) + _srm $s2,16,$acc1 + stb $acc6,6($out) + _srm $s2,8,$acc2 + stb $s1,7($out) + _srm $s3,24,$acc4 + stb $acc0,8($out) + _srm $s3,16,$acc5 + stb $acc1,9($out) + _srm $s3,8,$acc6 + stb $acc2,10($out) + stb $s2,11($out) + stb $acc4,12($out) + stb $acc5,13($out) + stb $acc6,14($out) + b L\$enc_done + stb $s3,15($out) + +L\$enc_out_aligned + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) + +L\$enc_done + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 + $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 + $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 + $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 + $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 + $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 + $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 + $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + + .ALIGN 16 +_parisc_AES_encrypt + .PROC + .CALLINFO MILLICODE + .ENTRY + ldw 240($key),$rounds + ldw 0($key),$t0 + ldw 4($key),$t1 + ldw 8($key),$t2 + _srm $rounds,1,$rounds + xor $t0,$s0,$s0 + ldw 12($key),$t3 + _srm $s0,24,$acc0 + xor $t1,$s1,$s1 + ldw 16($key),$t0 + _srm $s1,16,$acc1 + xor $t2,$s2,$s2 + ldw 20($key),$t1 + xor $t3,$s3,$s3 + ldw 24($key),$t2 + ldw 28($key),$t3 +L\$enc_loop + _srm $s2,8,$acc2 + ldwx,s $acc0($tbl),$acc0 + _srm $s3,0,$acc3 + ldwx,s $acc1($tbl),$acc1 + _srm $s1,24,$acc4 + ldwx,s $acc2($tbl),$acc2 + _srm $s2,16,$acc5 + ldwx,s $acc3($tbl),$acc3 + _srm $s3,8,$acc6 + ldwx,s $acc4($tbl),$acc4 + _srm $s0,0,$acc7 + ldwx,s $acc5($tbl),$acc5 + _srm $s2,24,$acc8 + ldwx,s $acc6($tbl),$acc6 + _srm $s3,16,$acc9 + ldwx,s $acc7($tbl),$acc7 + _srm $s0,8,$acc10 + ldwx,s $acc8($tbl),$acc8 + _srm $s1,0,$acc11 + ldwx,s $acc9($tbl),$acc9 + _srm $s3,24,$acc12 + ldwx,s $acc10($tbl),$acc10 + _srm $s0,16,$acc13 + ldwx,s $acc11($tbl),$acc11 + _srm $s1,8,$acc14 + ldwx,s $acc12($tbl),$acc12 + _srm $s2,0,$acc15 + ldwx,s $acc13($tbl),$acc13 + ldwx,s $acc14($tbl),$acc14 + ldwx,s $acc15($tbl),$acc15 + addib,= -1,$rounds,L\$enc_last + ldo 32($key),$key + + _ror $acc1,8,$acc1 + xor $acc0,$t0,$t0 + ldw 0($key),$s0 + _ror $acc2,16,$acc2 + xor $acc1,$t0,$t0 + ldw 4($key),$s1 + _ror $acc3,24,$acc3 + xor $acc2,$t0,$t0 + ldw 8($key),$s2 + _ror $acc5,8,$acc5 + xor $acc3,$t0,$t0 + ldw 12($key),$s3 + _ror $acc6,16,$acc6 + xor $acc4,$t1,$t1 + _ror $acc7,24,$acc7 + xor $acc5,$t1,$t1 + _ror $acc9,8,$acc9 + xor $acc6,$t1,$t1 + _ror $acc10,16,$acc10 + xor $acc7,$t1,$t1 + _ror $acc11,24,$acc11 + xor $acc8,$t2,$t2 + _ror $acc13,8,$acc13 + xor $acc9,$t2,$t2 + _ror $acc14,16,$acc14 + xor $acc10,$t2,$t2 + _ror $acc15,24,$acc15 + xor $acc11,$t2,$t2 + xor $acc12,$acc14,$acc14 + xor $acc13,$t3,$t3 + _srm $t0,24,$acc0 + xor $acc14,$t3,$t3 + _srm $t1,16,$acc1 + xor $acc15,$t3,$t3 + + _srm $t2,8,$acc2 + ldwx,s $acc0($tbl),$acc0 + _srm $t3,0,$acc3 + ldwx,s $acc1($tbl),$acc1 + _srm $t1,24,$acc4 + ldwx,s $acc2($tbl),$acc2 + _srm $t2,16,$acc5 + ldwx,s $acc3($tbl),$acc3 + _srm $t3,8,$acc6 + ldwx,s $acc4($tbl),$acc4 + _srm $t0,0,$acc7 + ldwx,s $acc5($tbl),$acc5 + _srm $t2,24,$acc8 + ldwx,s $acc6($tbl),$acc6 + _srm $t3,16,$acc9 + ldwx,s $acc7($tbl),$acc7 + _srm $t0,8,$acc10 + ldwx,s $acc8($tbl),$acc8 + _srm $t1,0,$acc11 + ldwx,s $acc9($tbl),$acc9 + _srm $t3,24,$acc12 + ldwx,s $acc10($tbl),$acc10 + _srm $t0,16,$acc13 + ldwx,s $acc11($tbl),$acc11 + _srm $t1,8,$acc14 + ldwx,s $acc12($tbl),$acc12 + _srm $t2,0,$acc15 + ldwx,s $acc13($tbl),$acc13 + _ror $acc1,8,$acc1 + ldwx,s $acc14($tbl),$acc14 + + _ror $acc2,16,$acc2 + xor $acc0,$s0,$s0 + ldwx,s $acc15($tbl),$acc15 + _ror $acc3,24,$acc3 + xor $acc1,$s0,$s0 + ldw 16($key),$t0 + _ror $acc5,8,$acc5 + xor $acc2,$s0,$s0 + ldw 20($key),$t1 + _ror $acc6,16,$acc6 + xor $acc3,$s0,$s0 + ldw 24($key),$t2 + _ror $acc7,24,$acc7 + xor $acc4,$s1,$s1 + ldw 28($key),$t3 + _ror $acc9,8,$acc9 + xor $acc5,$s1,$s1 + ldw 1024+0($tbl),%r0 ; prefetch te4 + _ror $acc10,16,$acc10 + xor $acc6,$s1,$s1 + ldw 1024+32($tbl),%r0 ; prefetch te4 + _ror $acc11,24,$acc11 + xor $acc7,$s1,$s1 + ldw 1024+64($tbl),%r0 ; prefetch te4 + _ror $acc13,8,$acc13 + xor $acc8,$s2,$s2 + ldw 1024+96($tbl),%r0 ; prefetch te4 + _ror $acc14,16,$acc14 + xor $acc9,$s2,$s2 + ldw 1024+128($tbl),%r0 ; prefetch te4 + _ror $acc15,24,$acc15 + xor $acc10,$s2,$s2 + ldw 1024+160($tbl),%r0 ; prefetch te4 + _srm $s0,24,$acc0 + xor $acc11,$s2,$s2 + ldw 1024+192($tbl),%r0 ; prefetch te4 + xor $acc12,$acc14,$acc14 + xor $acc13,$s3,$s3 + ldw 1024+224($tbl),%r0 ; prefetch te4 + _srm $s1,16,$acc1 + xor $acc14,$s3,$s3 + b L\$enc_loop + xor $acc15,$s3,$s3 + + .ALIGN 16 +L\$enc_last + ldo 1024($tbl),$rounds + _ror $acc1,8,$acc1 + xor $acc0,$t0,$t0 + ldw 0($key),$s0 + _ror $acc2,16,$acc2 + xor $acc1,$t0,$t0 + ldw 4($key),$s1 + _ror $acc3,24,$acc3 + xor $acc2,$t0,$t0 + ldw 8($key),$s2 + _ror $acc5,8,$acc5 + xor $acc3,$t0,$t0 + ldw 12($key),$s3 + _ror $acc6,16,$acc6 + xor $acc4,$t1,$t1 + _ror $acc7,24,$acc7 + xor $acc5,$t1,$t1 + _ror $acc9,8,$acc9 + xor $acc6,$t1,$t1 + _ror $acc10,16,$acc10 + xor $acc7,$t1,$t1 + _ror $acc11,24,$acc11 + xor $acc8,$t2,$t2 + _ror $acc13,8,$acc13 + xor $acc9,$t2,$t2 + _ror $acc14,16,$acc14 + xor $acc10,$t2,$t2 + _ror $acc15,24,$acc15 + xor $acc11,$t2,$t2 + xor $acc12,$acc14,$acc14 + xor $acc13,$t3,$t3 + _srm $t0,24,$acc0 + xor $acc14,$t3,$t3 + _srm $t1,16,$acc1 + xor $acc15,$t3,$t3 + + _srm $t2,8,$acc2 + ldbx $acc0($rounds),$acc0 + _srm $t1,24,$acc4 + ldbx $acc1($rounds),$acc1 + _srm $t2,16,$acc5 + _srm $t3,0,$acc3 + ldbx $acc2($rounds),$acc2 + ldbx $acc3($rounds),$acc3 + _srm $t3,8,$acc6 + ldbx $acc4($rounds),$acc4 + _srm $t2,24,$acc8 + ldbx $acc5($rounds),$acc5 + _srm $t3,16,$acc9 + _srm $t0,0,$acc7 + ldbx $acc6($rounds),$acc6 + ldbx $acc7($rounds),$acc7 + _srm $t0,8,$acc10 + ldbx $acc8($rounds),$acc8 + _srm $t3,24,$acc12 + ldbx $acc9($rounds),$acc9 + _srm $t0,16,$acc13 + _srm $t1,0,$acc11 + ldbx $acc10($rounds),$acc10 + _srm $t1,8,$acc14 + ldbx $acc11($rounds),$acc11 + ldbx $acc12($rounds),$acc12 + ldbx $acc13($rounds),$acc13 + _srm $t2,0,$acc15 + ldbx $acc14($rounds),$acc14 + + dep $acc0,7,8,$acc3 + ldbx $acc15($rounds),$acc15 + dep $acc4,7,8,$acc7 + dep $acc1,15,8,$acc3 + dep $acc5,15,8,$acc7 + dep $acc2,23,8,$acc3 + dep $acc6,23,8,$acc7 + xor $acc3,$s0,$s0 + xor $acc7,$s1,$s1 + dep $acc8,7,8,$acc11 + dep $acc12,7,8,$acc15 + dep $acc9,15,8,$acc11 + dep $acc13,15,8,$acc15 + dep $acc10,23,8,$acc11 + dep $acc14,23,8,$acc15 + xor $acc11,$s2,$s2 + + bv (%r31) + .EXIT + xor $acc15,$s3,$s3 + .PROCEND + + .ALIGN 64 +L\$AES_Te + .WORD 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d + .WORD 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 + .WORD 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d + .WORD 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a + .WORD 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 + .WORD 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b + .WORD 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea + .WORD 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b + .WORD 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a + .WORD 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f + .WORD 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 + .WORD 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f + .WORD 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e + .WORD 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 + .WORD 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d + .WORD 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f + .WORD 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e + .WORD 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb + .WORD 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce + .WORD 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 + .WORD 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c + .WORD 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed + .WORD 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b + .WORD 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a + .WORD 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 + .WORD 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 + .WORD 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 + .WORD 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 + .WORD 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a + .WORD 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 + .WORD 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 + .WORD 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d + .WORD 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f + .WORD 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 + .WORD 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 + .WORD 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 + .WORD 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f + .WORD 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 + .WORD 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c + .WORD 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 + .WORD 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e + .WORD 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 + .WORD 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 + .WORD 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b + .WORD 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 + .WORD 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 + .WORD 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 + .WORD 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 + .WORD 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 + .WORD 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 + .WORD 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 + .WORD 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 + .WORD 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa + .WORD 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 + .WORD 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 + .WORD 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 + .WORD 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 + .WORD 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 + .WORD 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 + .WORD 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a + .WORD 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 + .WORD 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 + .WORD 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 + .WORD 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a + .BYTE 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .BYTE 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .BYTE 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .BYTE 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .BYTE 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .BYTE 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .BYTE 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .BYTE 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .BYTE 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .BYTE 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .BYTE 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .BYTE 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .BYTE 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .BYTE 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .BYTE 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .BYTE 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .BYTE 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .BYTE 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .BYTE 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .BYTE 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .BYTE 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .BYTE 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .BYTE 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .BYTE 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .BYTE 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .BYTE 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .BYTE 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .BYTE 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .BYTE 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .BYTE 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .BYTE 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .BYTE 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +___ + +$code.=<<___; + .EXPORT AES_decrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR + .ALIGN 16 +AES_decrypt + .PROC + .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) + $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) + $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) + $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) + $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) + $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) + $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) + $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) + + blr %r0,$tbl + ldi 3,$t0 +L\$dec_pic + andcm $tbl,$t0,$tbl + ldo L\$AES_Td-L\$dec_pic($tbl),$tbl + + and $inp,$t0,$t0 + sub $inp,$t0,$inp + ldw 0($inp),$s0 + ldw 4($inp),$s1 + ldw 8($inp),$s2 + comib,= 0,$t0,L\$dec_inp_aligned + ldw 12($inp),$s3 + + sh3addl $t0,%r0,$t0 + subi 32,$t0,$t0 + mtctl $t0,%cr11 + ldw 16($inp),$t1 + vshd $s0,$s1,$s0 + vshd $s1,$s2,$s1 + vshd $s2,$s3,$s2 + vshd $s3,$t1,$s3 + +L\$dec_inp_aligned + bl _parisc_AES_decrypt,%r31 + nop + + extru,<> $out,31,2,%r0 + b L\$dec_out_aligned + nop + + _srm $s0,24,$acc0 + _srm $s0,16,$acc1 + stb $acc0,0($out) + _srm $s0,8,$acc2 + stb $acc1,1($out) + _srm $s1,24,$acc4 + stb $acc2,2($out) + _srm $s1,16,$acc5 + stb $s0,3($out) + _srm $s1,8,$acc6 + stb $acc4,4($out) + _srm $s2,24,$acc0 + stb $acc5,5($out) + _srm $s2,16,$acc1 + stb $acc6,6($out) + _srm $s2,8,$acc2 + stb $s1,7($out) + _srm $s3,24,$acc4 + stb $acc0,8($out) + _srm $s3,16,$acc5 + stb $acc1,9($out) + _srm $s3,8,$acc6 + stb $acc2,10($out) + stb $s2,11($out) + stb $acc4,12($out) + stb $acc5,13($out) + stb $acc6,14($out) + b L\$dec_done + stb $s3,15($out) + +L\$dec_out_aligned + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) + +L\$dec_done + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 + $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 + $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 + $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 + $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 + $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 + $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 + $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + + .ALIGN 16 +_parisc_AES_decrypt + .PROC + .CALLINFO MILLICODE + .ENTRY + ldw 240($key),$rounds + ldw 0($key),$t0 + ldw 4($key),$t1 + ldw 8($key),$t2 + ldw 12($key),$t3 + _srm $rounds,1,$rounds + xor $t0,$s0,$s0 + ldw 16($key),$t0 + xor $t1,$s1,$s1 + ldw 20($key),$t1 + _srm $s0,24,$acc0 + xor $t2,$s2,$s2 + ldw 24($key),$t2 + xor $t3,$s3,$s3 + ldw 28($key),$t3 + _srm $s3,16,$acc1 +L\$dec_loop + _srm $s2,8,$acc2 + ldwx,s $acc0($tbl),$acc0 + _srm $s1,0,$acc3 + ldwx,s $acc1($tbl),$acc1 + _srm $s1,24,$acc4 + ldwx,s $acc2($tbl),$acc2 + _srm $s0,16,$acc5 + ldwx,s $acc3($tbl),$acc3 + _srm $s3,8,$acc6 + ldwx,s $acc4($tbl),$acc4 + _srm $s2,0,$acc7 + ldwx,s $acc5($tbl),$acc5 + _srm $s2,24,$acc8 + ldwx,s $acc6($tbl),$acc6 + _srm $s1,16,$acc9 + ldwx,s $acc7($tbl),$acc7 + _srm $s0,8,$acc10 + ldwx,s $acc8($tbl),$acc8 + _srm $s3,0,$acc11 + ldwx,s $acc9($tbl),$acc9 + _srm $s3,24,$acc12 + ldwx,s $acc10($tbl),$acc10 + _srm $s2,16,$acc13 + ldwx,s $acc11($tbl),$acc11 + _srm $s1,8,$acc14 + ldwx,s $acc12($tbl),$acc12 + _srm $s0,0,$acc15 + ldwx,s $acc13($tbl),$acc13 + ldwx,s $acc14($tbl),$acc14 + ldwx,s $acc15($tbl),$acc15 + addib,= -1,$rounds,L\$dec_last + ldo 32($key),$key + + _ror $acc1,8,$acc1 + xor $acc0,$t0,$t0 + ldw 0($key),$s0 + _ror $acc2,16,$acc2 + xor $acc1,$t0,$t0 + ldw 4($key),$s1 + _ror $acc3,24,$acc3 + xor $acc2,$t0,$t0 + ldw 8($key),$s2 + _ror $acc5,8,$acc5 + xor $acc3,$t0,$t0 + ldw 12($key),$s3 + _ror $acc6,16,$acc6 + xor $acc4,$t1,$t1 + _ror $acc7,24,$acc7 + xor $acc5,$t1,$t1 + _ror $acc9,8,$acc9 + xor $acc6,$t1,$t1 + _ror $acc10,16,$acc10 + xor $acc7,$t1,$t1 + _ror $acc11,24,$acc11 + xor $acc8,$t2,$t2 + _ror $acc13,8,$acc13 + xor $acc9,$t2,$t2 + _ror $acc14,16,$acc14 + xor $acc10,$t2,$t2 + _ror $acc15,24,$acc15 + xor $acc11,$t2,$t2 + xor $acc12,$acc14,$acc14 + xor $acc13,$t3,$t3 + _srm $t0,24,$acc0 + xor $acc14,$t3,$t3 + xor $acc15,$t3,$t3 + _srm $t3,16,$acc1 + + _srm $t2,8,$acc2 + ldwx,s $acc0($tbl),$acc0 + _srm $t1,0,$acc3 + ldwx,s $acc1($tbl),$acc1 + _srm $t1,24,$acc4 + ldwx,s $acc2($tbl),$acc2 + _srm $t0,16,$acc5 + ldwx,s $acc3($tbl),$acc3 + _srm $t3,8,$acc6 + ldwx,s $acc4($tbl),$acc4 + _srm $t2,0,$acc7 + ldwx,s $acc5($tbl),$acc5 + _srm $t2,24,$acc8 + ldwx,s $acc6($tbl),$acc6 + _srm $t1,16,$acc9 + ldwx,s $acc7($tbl),$acc7 + _srm $t0,8,$acc10 + ldwx,s $acc8($tbl),$acc8 + _srm $t3,0,$acc11 + ldwx,s $acc9($tbl),$acc9 + _srm $t3,24,$acc12 + ldwx,s $acc10($tbl),$acc10 + _srm $t2,16,$acc13 + ldwx,s $acc11($tbl),$acc11 + _srm $t1,8,$acc14 + ldwx,s $acc12($tbl),$acc12 + _srm $t0,0,$acc15 + ldwx,s $acc13($tbl),$acc13 + _ror $acc1,8,$acc1 + ldwx,s $acc14($tbl),$acc14 + + _ror $acc2,16,$acc2 + xor $acc0,$s0,$s0 + ldwx,s $acc15($tbl),$acc15 + _ror $acc3,24,$acc3 + xor $acc1,$s0,$s0 + ldw 16($key),$t0 + _ror $acc5,8,$acc5 + xor $acc2,$s0,$s0 + ldw 20($key),$t1 + _ror $acc6,16,$acc6 + xor $acc3,$s0,$s0 + ldw 24($key),$t2 + _ror $acc7,24,$acc7 + xor $acc4,$s1,$s1 + ldw 28($key),$t3 + _ror $acc9,8,$acc9 + xor $acc5,$s1,$s1 + ldw 1024+0($tbl),%r0 ; prefetch td4 + _ror $acc10,16,$acc10 + xor $acc6,$s1,$s1 + ldw 1024+32($tbl),%r0 ; prefetch td4 + _ror $acc11,24,$acc11 + xor $acc7,$s1,$s1 + ldw 1024+64($tbl),%r0 ; prefetch td4 + _ror $acc13,8,$acc13 + xor $acc8,$s2,$s2 + ldw 1024+96($tbl),%r0 ; prefetch td4 + _ror $acc14,16,$acc14 + xor $acc9,$s2,$s2 + ldw 1024+128($tbl),%r0 ; prefetch td4 + _ror $acc15,24,$acc15 + xor $acc10,$s2,$s2 + ldw 1024+160($tbl),%r0 ; prefetch td4 + _srm $s0,24,$acc0 + xor $acc11,$s2,$s2 + ldw 1024+192($tbl),%r0 ; prefetch td4 + xor $acc12,$acc14,$acc14 + xor $acc13,$s3,$s3 + ldw 1024+224($tbl),%r0 ; prefetch td4 + xor $acc14,$s3,$s3 + xor $acc15,$s3,$s3 + b L\$dec_loop + _srm $s3,16,$acc1 + + .ALIGN 16 +L\$dec_last + ldo 1024($tbl),$rounds + _ror $acc1,8,$acc1 + xor $acc0,$t0,$t0 + ldw 0($key),$s0 + _ror $acc2,16,$acc2 + xor $acc1,$t0,$t0 + ldw 4($key),$s1 + _ror $acc3,24,$acc3 + xor $acc2,$t0,$t0 + ldw 8($key),$s2 + _ror $acc5,8,$acc5 + xor $acc3,$t0,$t0 + ldw 12($key),$s3 + _ror $acc6,16,$acc6 + xor $acc4,$t1,$t1 + _ror $acc7,24,$acc7 + xor $acc5,$t1,$t1 + _ror $acc9,8,$acc9 + xor $acc6,$t1,$t1 + _ror $acc10,16,$acc10 + xor $acc7,$t1,$t1 + _ror $acc11,24,$acc11 + xor $acc8,$t2,$t2 + _ror $acc13,8,$acc13 + xor $acc9,$t2,$t2 + _ror $acc14,16,$acc14 + xor $acc10,$t2,$t2 + _ror $acc15,24,$acc15 + xor $acc11,$t2,$t2 + xor $acc12,$acc14,$acc14 + xor $acc13,$t3,$t3 + _srm $t0,24,$acc0 + xor $acc14,$t3,$t3 + xor $acc15,$t3,$t3 + _srm $t3,16,$acc1 + + _srm $t2,8,$acc2 + ldbx $acc0($rounds),$acc0 + _srm $t1,24,$acc4 + ldbx $acc1($rounds),$acc1 + _srm $t0,16,$acc5 + _srm $t1,0,$acc3 + ldbx $acc2($rounds),$acc2 + ldbx $acc3($rounds),$acc3 + _srm $t3,8,$acc6 + ldbx $acc4($rounds),$acc4 + _srm $t2,24,$acc8 + ldbx $acc5($rounds),$acc5 + _srm $t1,16,$acc9 + _srm $t2,0,$acc7 + ldbx $acc6($rounds),$acc6 + ldbx $acc7($rounds),$acc7 + _srm $t0,8,$acc10 + ldbx $acc8($rounds),$acc8 + _srm $t3,24,$acc12 + ldbx $acc9($rounds),$acc9 + _srm $t2,16,$acc13 + _srm $t3,0,$acc11 + ldbx $acc10($rounds),$acc10 + _srm $t1,8,$acc14 + ldbx $acc11($rounds),$acc11 + ldbx $acc12($rounds),$acc12 + ldbx $acc13($rounds),$acc13 + _srm $t0,0,$acc15 + ldbx $acc14($rounds),$acc14 + + dep $acc0,7,8,$acc3 + ldbx $acc15($rounds),$acc15 + dep $acc4,7,8,$acc7 + dep $acc1,15,8,$acc3 + dep $acc5,15,8,$acc7 + dep $acc2,23,8,$acc3 + dep $acc6,23,8,$acc7 + xor $acc3,$s0,$s0 + xor $acc7,$s1,$s1 + dep $acc8,7,8,$acc11 + dep $acc12,7,8,$acc15 + dep $acc9,15,8,$acc11 + dep $acc13,15,8,$acc15 + dep $acc10,23,8,$acc11 + dep $acc14,23,8,$acc15 + xor $acc11,$s2,$s2 + + bv (%r31) + .EXIT + xor $acc15,$s3,$s3 + .PROCEND + + .ALIGN 64 +L\$AES_Td + .WORD 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 + .WORD 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 + .WORD 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 + .WORD 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f + .WORD 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 + .WORD 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 + .WORD 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da + .WORD 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 + .WORD 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd + .WORD 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 + .WORD 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 + .WORD 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 + .WORD 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 + .WORD 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a + .WORD 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 + .WORD 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c + .WORD 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 + .WORD 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a + .WORD 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 + .WORD 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 + .WORD 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 + .WORD 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff + .WORD 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 + .WORD 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb + .WORD 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 + .WORD 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e + .WORD 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 + .WORD 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a + .WORD 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e + .WORD 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 + .WORD 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d + .WORD 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 + .WORD 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd + .WORD 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 + .WORD 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 + .WORD 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 + .WORD 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d + .WORD 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 + .WORD 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 + .WORD 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef + .WORD 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 + .WORD 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 + .WORD 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 + .WORD 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 + .WORD 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 + .WORD 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b + .WORD 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 + .WORD 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 + .WORD 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 + .WORD 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 + .WORD 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 + .WORD 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f + .WORD 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df + .WORD 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f + .WORD 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e + .WORD 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 + .WORD 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 + .WORD 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c + .WORD 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf + .WORD 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 + .WORD 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f + .WORD 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 + .WORD 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 + .WORD 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 + .BYTE 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .BYTE 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .BYTE 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .BYTE 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .BYTE 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .BYTE 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .BYTE 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .BYTE 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .BYTE 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .BYTE 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .BYTE 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .BYTE 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .BYTE 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .BYTE 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .BYTE 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .BYTE 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .BYTE 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .BYTE 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .BYTE 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .BYTE 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .BYTE 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .BYTE 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .BYTE 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .BYTE 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .BYTE 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .BYTE 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .BYTE 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .BYTE 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .BYTE 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .BYTE 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .BYTE 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .BYTE 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .STRINGZ "AES for PA-RISC, CRYPTOGAMS by " +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + # translate made up instructons: _ror, _srm + s/_ror(\s+)(%r[0-9]+),/shd$1$2,$2,/ or + + s/_srm(\s+%r[0-9]+),([0-9]+),/ + $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) + : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; + + s/,\*/,/ if ($SIZE_T==4); + s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8); + print $_,"\n"; +} +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/aes-ppc.pl b/app/openssl/crypto/aes/asm/aes-ppc.pl index f82c5e18..7c52cbe5 100644 --- a/app/openssl/crypto/aes/asm/aes-ppc.pl +++ b/app/openssl/crypto/aes/asm/aes-ppc.pl @@ -7,7 +7,7 @@ # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== -# Needs more work: key setup, page boundaries, CBC routine... +# Needs more work: key setup, CBC routine... # # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc @@ -18,7 +18,7 @@ # February 2010 # -# Rescheduling instructions to favour Power6 pipeline gives 10% +# Rescheduling instructions to favour Power6 pipeline gave 10% # performance improvement on the platfrom in question (and marginal # improvement even on others). It should be noted that Power6 fails # to process byte in 18 cycles, only in 23, because it fails to issue @@ -33,11 +33,13 @@ $flavour = shift; if ($flavour =~ /64/) { $SIZE_T =8; + $LRSAVE =2*$SIZE_T; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; } elsif ($flavour =~ /32/) { $SIZE_T =4; + $LRSAVE =$SIZE_T; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; @@ -116,15 +118,19 @@ LAES_Te: addi $Tbl0,$Tbl0,`128-8` mtlr r0 blr - .space `32-24` + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` LAES_Td: mflr r0 bcl 20,31,\$+4 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8-32+2048+256` + addi $Tbl0,$Tbl0,`128-64-8+2048+256` mtlr r0 blr - .space `128-32-24` + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `128-64-9*4` ___ &_data_word( 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, @@ -328,10 +334,9 @@ $code.=<<___; .globl .AES_encrypt .align 7 .AES_encrypt: - mflr r0 $STU $sp,-$FRAME($sp) + mflr r0 - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) @@ -352,7 +357,14 @@ $code.=<<___; $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) + + andi. $t0,$inp,3 + andi. $t1,$out,3 + or. $t0,$t0,$t1 + bne Lenc_unaligned +Lenc_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) @@ -363,8 +375,80 @@ $code.=<<___; stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) + b Lenc_done + +Lenc_unaligned: + subfic $t0,$inp,4096 + subfic $t1,$out,4096 + andi. $t0,$t0,4096-16 + beq Lenc_xpage + andi. $t1,$t1,4096-16 + bne Lenc_unaligned_ok + +Lenc_xpage: + lbz $acc00,0($inp) + lbz $acc01,1($inp) + lbz $acc02,2($inp) + lbz $s0,3($inp) + lbz $acc04,4($inp) + lbz $acc05,5($inp) + lbz $acc06,6($inp) + lbz $s1,7($inp) + lbz $acc08,8($inp) + lbz $acc09,9($inp) + lbz $acc10,10($inp) + insrwi $s0,$acc00,8,0 + lbz $s2,11($inp) + insrwi $s1,$acc04,8,0 + lbz $acc12,12($inp) + insrwi $s0,$acc01,8,8 + lbz $acc13,13($inp) + insrwi $s1,$acc05,8,8 + lbz $acc14,14($inp) + insrwi $s0,$acc02,8,16 + lbz $s3,15($inp) + insrwi $s1,$acc06,8,16 + insrwi $s2,$acc08,8,0 + insrwi $s3,$acc12,8,0 + insrwi $s2,$acc09,8,8 + insrwi $s3,$acc13,8,8 + insrwi $s2,$acc10,8,16 + insrwi $s3,$acc14,8,16 + + bl LAES_Te + bl Lppc_AES_encrypt_compact + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 + stb $acc00,0($out) + extrwi $acc02,$s0,8,16 + stb $acc01,1($out) + stb $acc02,2($out) + extrwi $acc04,$s1,8,0 + stb $s0,3($out) + extrwi $acc05,$s1,8,8 + stb $acc04,4($out) + extrwi $acc06,$s1,8,16 + stb $acc05,5($out) + stb $acc06,6($out) + extrwi $acc08,$s2,8,0 + stb $s1,7($out) + extrwi $acc09,$s2,8,8 + stb $acc08,8($out) + extrwi $acc10,$s2,8,16 + stb $acc09,9($out) + stb $acc10,10($out) + extrwi $acc12,$s3,8,0 + stb $s2,11($out) + extrwi $acc13,$s3,8,8 + stb $acc12,12($out) + extrwi $acc14,$s3,8,16 + stb $acc13,13($out) + stb $acc14,14($out) + stb $s3,15($out) - $POP r0,`$FRAME-$SIZE_T*21`($sp) +Lenc_done: + $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) @@ -388,18 +472,21 @@ $code.=<<___; mtlr r0 addi $sp,$sp,$FRAME blr + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 .align 5 Lppc_AES_encrypt: lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) addi $Tbl1,$Tbl0,3 + lwz $t0,0($key) addi $Tbl2,$Tbl0,2 + lwz $t1,4($key) addi $Tbl3,$Tbl0,1 + lwz $t2,8($key) addi $acc00,$acc00,-1 + lwz $t3,12($key) addi $key,$key,16 xor $s0,$s0,$t0 xor $s1,$s1,$t1 @@ -413,44 +500,44 @@ Lenc_loop: rlwinm $acc02,$s2,`32-24+3`,21,28 rlwinm $acc03,$s3,`32-24+3`,21,28 lwz $t0,0($key) - lwz $t1,4($key) rlwinm $acc04,$s1,`32-16+3`,21,28 + lwz $t1,4($key) rlwinm $acc05,$s2,`32-16+3`,21,28 lwz $t2,8($key) - lwz $t3,12($key) rlwinm $acc06,$s3,`32-16+3`,21,28 + lwz $t3,12($key) rlwinm $acc07,$s0,`32-16+3`,21,28 lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 rlwinm $acc08,$s2,`32-8+3`,21,28 + lwzx $acc01,$Tbl0,$acc01 rlwinm $acc09,$s3,`32-8+3`,21,28 lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 rlwinm $acc10,$s0,`32-8+3`,21,28 + lwzx $acc03,$Tbl0,$acc03 rlwinm $acc11,$s1,`32-8+3`,21,28 lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s3,`0+3`,21,28 + lwzx $acc05,$Tbl1,$acc05 rlwinm $acc13,$s0,`0+3`,21,28 lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s1,`0+3`,21,28 + lwzx $acc07,$Tbl1,$acc07 rlwinm $acc15,$s2,`0+3`,21,28 lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 xor $t0,$t0,$acc00 + lwzx $acc09,$Tbl2,$acc09 xor $t1,$t1,$acc01 lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 xor $t2,$t2,$acc02 + lwzx $acc11,$Tbl2,$acc11 xor $t3,$t3,$acc03 lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 xor $t0,$t0,$acc04 + lwzx $acc13,$Tbl3,$acc13 xor $t1,$t1,$acc05 lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 xor $t2,$t2,$acc06 + lwzx $acc15,$Tbl3,$acc15 xor $t3,$t3,$acc07 xor $t0,$t0,$acc08 xor $t1,$t1,$acc09 @@ -466,60 +553,60 @@ Lenc_loop: addi $Tbl2,$Tbl0,2048 nop lwz $t0,0($key) - lwz $t1,4($key) rlwinm $acc00,$s0,`32-24`,24,31 + lwz $t1,4($key) rlwinm $acc01,$s1,`32-24`,24,31 lwz $t2,8($key) - lwz $t3,12($key) rlwinm $acc02,$s2,`32-24`,24,31 + lwz $t3,12($key) rlwinm $acc03,$s3,`32-24`,24,31 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 - lwz $acc09,`2048+32`($Tbl0) rlwinm $acc04,$s1,`32-16`,24,31 + lwz $acc09,`2048+32`($Tbl0) rlwinm $acc05,$s2,`32-16`,24,31 lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) rlwinm $acc06,$s3,`32-16`,24,31 + lwz $acc11,`2048+96`($Tbl0) rlwinm $acc07,$s0,`32-16`,24,31 lwz $acc12,`2048+128`($Tbl0) - lwz $acc13,`2048+160`($Tbl0) rlwinm $acc08,$s2,`32-8`,24,31 + lwz $acc13,`2048+160`($Tbl0) rlwinm $acc09,$s3,`32-8`,24,31 lwz $acc14,`2048+192`($Tbl0) - lwz $acc15,`2048+224`($Tbl0) rlwinm $acc10,$s0,`32-8`,24,31 + lwz $acc15,`2048+224`($Tbl0) rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc00,$Tbl2,$acc00 - lbzx $acc01,$Tbl2,$acc01 rlwinm $acc12,$s3,`0`,24,31 + lbzx $acc01,$Tbl2,$acc01 rlwinm $acc13,$s0,`0`,24,31 lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 rlwinm $acc14,$s1,`0`,24,31 + lbzx $acc03,$Tbl2,$acc03 rlwinm $acc15,$s2,`0`,24,31 lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 rlwinm $s0,$acc00,24,0,7 + lbzx $acc05,$Tbl2,$acc05 rlwinm $s1,$acc01,24,0,7 lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 rlwinm $s2,$acc02,24,0,7 + lbzx $acc07,$Tbl2,$acc07 rlwinm $s3,$acc03,24,0,7 lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 rlwimi $s0,$acc04,16,8,15 + lbzx $acc09,$Tbl2,$acc09 rlwimi $s1,$acc05,16,8,15 lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 rlwimi $s2,$acc06,16,8,15 + lbzx $acc11,$Tbl2,$acc11 rlwimi $s3,$acc07,16,8,15 lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 rlwimi $s0,$acc08,8,16,23 + lbzx $acc13,$Tbl2,$acc13 rlwimi $s1,$acc09,8,16,23 lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 rlwimi $s2,$acc10,8,16,23 + lbzx $acc15,$Tbl2,$acc15 rlwimi $s3,$acc11,8,16,23 or $s0,$s0,$acc12 or $s1,$s1,$acc13 @@ -530,29 +617,31 @@ Lenc_loop: xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .align 4 Lppc_AES_encrypt_compact: lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) addi $Tbl1,$Tbl0,2048 + lwz $t0,0($key) lis $mask80,0x8080 + lwz $t1,4($key) lis $mask1b,0x1b1b - addi $key,$key,16 + lwz $t2,8($key) ori $mask80,$mask80,0x8080 + lwz $t3,12($key) ori $mask1b,$mask1b,0x1b1b + addi $key,$key,16 mtctr $acc00 .align 4 Lenc_compact_loop: xor $s0,$s0,$t0 xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 rlwinm $acc00,$s0,`32-24`,24,31 + xor $s2,$s2,$t2 rlwinm $acc01,$s1,`32-24`,24,31 + xor $s3,$s3,$t3 rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 rlwinm $acc04,$s1,`32-16`,24,31 @@ -560,48 +649,48 @@ Lenc_compact_loop: rlwinm $acc06,$s3,`32-16`,24,31 rlwinm $acc07,$s0,`32-16`,24,31 lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 rlwinm $acc08,$s2,`32-8`,24,31 + lbzx $acc01,$Tbl1,$acc01 rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 + lbzx $acc03,$Tbl1,$acc03 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s3,`0`,24,31 + lbzx $acc05,$Tbl1,$acc05 rlwinm $acc13,$s0,`0`,24,31 lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s1,`0`,24,31 + lbzx $acc07,$Tbl1,$acc07 rlwinm $acc15,$s2,`0`,24,31 lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 rlwinm $s0,$acc00,24,0,7 + lbzx $acc09,$Tbl1,$acc09 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 rlwinm $s2,$acc02,24,0,7 + lbzx $acc11,$Tbl1,$acc11 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 rlwimi $s0,$acc04,16,8,15 + lbzx $acc13,$Tbl1,$acc13 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 rlwimi $s2,$acc06,16,8,15 + lbzx $acc15,$Tbl1,$acc15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 lwz $t0,0($key) - lwz $t1,4($key) or $s0,$s0,$acc12 + lwz $t1,4($key) or $s1,$s1,$acc13 lwz $t2,8($key) - lwz $t3,12($key) or $s2,$s2,$acc14 + lwz $t3,12($key) or $s3,$s3,$acc15 addi $key,$key,16 @@ -612,12 +701,12 @@ Lenc_compact_loop: and $acc02,$s2,$mask80 and $acc03,$s3,$mask80 srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f + srwi $acc05,$acc01,7 andc $acc09,$s1,$mask80 + srwi $acc06,$acc02,7 andc $acc10,$s2,$mask80 + srwi $acc07,$acc03,7 andc $acc11,$s3,$mask80 sub $acc00,$acc00,$acc04 # r1-(r1>>7) sub $acc01,$acc01,$acc05 @@ -633,32 +722,32 @@ Lenc_compact_loop: and $acc03,$acc03,$mask1b xor $acc00,$acc00,$acc08 # r2 xor $acc01,$acc01,$acc09 + rotlwi $acc12,$s0,16 # ROTATE(r0,16) xor $acc02,$acc02,$acc10 + rotlwi $acc13,$s1,16 xor $acc03,$acc03,$acc11 + rotlwi $acc14,$s2,16 - rotlwi $acc12,$s0,16 # ROTATE(r0,16) - rotlwi $acc13,$s1,16 - rotlwi $acc14,$s2,16 - rotlwi $acc15,$s3,16 xor $s0,$s0,$acc00 # r0^r2 + rotlwi $acc15,$s3,16 xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) + xor $s2,$s2,$acc02 rotrwi $s1,$s1,24 + xor $s3,$s3,$acc03 rotrwi $s2,$s2,24 - rotrwi $s3,$s3,24 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 + rotrwi $s3,$s3,24 xor $s1,$s1,$acc01 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) - rotlwi $acc09,$acc13,8 - rotlwi $acc10,$acc14,8 - rotlwi $acc11,$acc15,8 xor $s0,$s0,$acc12 # + rotlwi $acc09,$acc13,8 xor $s1,$s1,$acc13 + rotlwi $acc10,$acc14,8 xor $s2,$s2,$acc14 + rotlwi $acc11,$acc15,8 xor $s3,$s3,$acc15 xor $s0,$s0,$acc08 # xor $s1,$s1,$acc09 @@ -673,14 +762,15 @@ Lenc_compact_done: xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .globl .AES_decrypt .align 7 .AES_decrypt: - mflr r0 $STU $sp,-$FRAME($sp) + mflr r0 - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) @@ -701,7 +791,14 @@ Lenc_compact_done: $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) + andi. $t0,$inp,3 + andi. $t1,$out,3 + or. $t0,$t0,$t1 + bne Ldec_unaligned + +Ldec_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) @@ -712,8 +809,80 @@ Lenc_compact_done: stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) + b Ldec_done + +Ldec_unaligned: + subfic $t0,$inp,4096 + subfic $t1,$out,4096 + andi. $t0,$t0,4096-16 + beq Ldec_xpage + andi. $t1,$t1,4096-16 + bne Ldec_unaligned_ok + +Ldec_xpage: + lbz $acc00,0($inp) + lbz $acc01,1($inp) + lbz $acc02,2($inp) + lbz $s0,3($inp) + lbz $acc04,4($inp) + lbz $acc05,5($inp) + lbz $acc06,6($inp) + lbz $s1,7($inp) + lbz $acc08,8($inp) + lbz $acc09,9($inp) + lbz $acc10,10($inp) + insrwi $s0,$acc00,8,0 + lbz $s2,11($inp) + insrwi $s1,$acc04,8,0 + lbz $acc12,12($inp) + insrwi $s0,$acc01,8,8 + lbz $acc13,13($inp) + insrwi $s1,$acc05,8,8 + lbz $acc14,14($inp) + insrwi $s0,$acc02,8,16 + lbz $s3,15($inp) + insrwi $s1,$acc06,8,16 + insrwi $s2,$acc08,8,0 + insrwi $s3,$acc12,8,0 + insrwi $s2,$acc09,8,8 + insrwi $s3,$acc13,8,8 + insrwi $s2,$acc10,8,16 + insrwi $s3,$acc14,8,16 + + bl LAES_Td + bl Lppc_AES_decrypt_compact - $POP r0,`$FRAME-$SIZE_T*21`($sp) + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 + stb $acc00,0($out) + extrwi $acc02,$s0,8,16 + stb $acc01,1($out) + stb $acc02,2($out) + extrwi $acc04,$s1,8,0 + stb $s0,3($out) + extrwi $acc05,$s1,8,8 + stb $acc04,4($out) + extrwi $acc06,$s1,8,16 + stb $acc05,5($out) + stb $acc06,6($out) + extrwi $acc08,$s2,8,0 + stb $s1,7($out) + extrwi $acc09,$s2,8,8 + stb $acc08,8($out) + extrwi $acc10,$s2,8,16 + stb $acc09,9($out) + stb $acc10,10($out) + extrwi $acc12,$s3,8,0 + stb $s2,11($out) + extrwi $acc13,$s3,8,8 + stb $acc12,12($out) + extrwi $acc14,$s3,8,16 + stb $acc13,13($out) + stb $acc14,14($out) + stb $s3,15($out) + +Ldec_done: + $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) @@ -737,18 +906,21 @@ Lenc_compact_done: mtlr r0 addi $sp,$sp,$FRAME blr + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 .align 5 Lppc_AES_decrypt: lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) addi $Tbl1,$Tbl0,3 + lwz $t0,0($key) addi $Tbl2,$Tbl0,2 + lwz $t1,4($key) addi $Tbl3,$Tbl0,1 + lwz $t2,8($key) addi $acc00,$acc00,-1 + lwz $t3,12($key) addi $key,$key,16 xor $s0,$s0,$t0 xor $s1,$s1,$t1 @@ -762,44 +934,44 @@ Ldec_loop: rlwinm $acc02,$s2,`32-24+3`,21,28 rlwinm $acc03,$s3,`32-24+3`,21,28 lwz $t0,0($key) - lwz $t1,4($key) rlwinm $acc04,$s3,`32-16+3`,21,28 + lwz $t1,4($key) rlwinm $acc05,$s0,`32-16+3`,21,28 lwz $t2,8($key) - lwz $t3,12($key) rlwinm $acc06,$s1,`32-16+3`,21,28 + lwz $t3,12($key) rlwinm $acc07,$s2,`32-16+3`,21,28 lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 rlwinm $acc08,$s2,`32-8+3`,21,28 + lwzx $acc01,$Tbl0,$acc01 rlwinm $acc09,$s3,`32-8+3`,21,28 lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 rlwinm $acc10,$s0,`32-8+3`,21,28 + lwzx $acc03,$Tbl0,$acc03 rlwinm $acc11,$s1,`32-8+3`,21,28 lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s1,`0+3`,21,28 + lwzx $acc05,$Tbl1,$acc05 rlwinm $acc13,$s2,`0+3`,21,28 lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s3,`0+3`,21,28 + lwzx $acc07,$Tbl1,$acc07 rlwinm $acc15,$s0,`0+3`,21,28 lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 xor $t0,$t0,$acc00 + lwzx $acc09,$Tbl2,$acc09 xor $t1,$t1,$acc01 lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 xor $t2,$t2,$acc02 + lwzx $acc11,$Tbl2,$acc11 xor $t3,$t3,$acc03 lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 xor $t0,$t0,$acc04 + lwzx $acc13,$Tbl3,$acc13 xor $t1,$t1,$acc05 lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 xor $t2,$t2,$acc06 + lwzx $acc15,$Tbl3,$acc15 xor $t3,$t3,$acc07 xor $t0,$t0,$acc08 xor $t1,$t1,$acc09 @@ -815,56 +987,56 @@ Ldec_loop: addi $Tbl2,$Tbl0,2048 nop lwz $t0,0($key) - lwz $t1,4($key) rlwinm $acc00,$s0,`32-24`,24,31 + lwz $t1,4($key) rlwinm $acc01,$s1,`32-24`,24,31 lwz $t2,8($key) - lwz $t3,12($key) rlwinm $acc02,$s2,`32-24`,24,31 + lwz $t3,12($key) rlwinm $acc03,$s3,`32-24`,24,31 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 - lwz $acc09,`2048+32`($Tbl0) rlwinm $acc04,$s3,`32-16`,24,31 + lwz $acc09,`2048+32`($Tbl0) rlwinm $acc05,$s0,`32-16`,24,31 lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) lbzx $acc00,$Tbl2,$acc00 + lwz $acc11,`2048+96`($Tbl0) lbzx $acc01,$Tbl2,$acc01 lwz $acc12,`2048+128`($Tbl0) - lwz $acc13,`2048+160`($Tbl0) rlwinm $acc06,$s1,`32-16`,24,31 + lwz $acc13,`2048+160`($Tbl0) rlwinm $acc07,$s2,`32-16`,24,31 lwz $acc14,`2048+192`($Tbl0) - lwz $acc15,`2048+224`($Tbl0) rlwinm $acc08,$s2,`32-8`,24,31 + lwz $acc15,`2048+224`($Tbl0) rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 + lbzx $acc03,$Tbl2,$acc03 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 rlwinm $acc12,$s1,`0`,24,31 + lbzx $acc05,$Tbl2,$acc05 rlwinm $acc13,$s2,`0`,24,31 lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 rlwinm $acc14,$s3,`0`,24,31 + lbzx $acc07,$Tbl2,$acc07 rlwinm $acc15,$s0,`0`,24,31 lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 rlwinm $s0,$acc00,24,0,7 + lbzx $acc09,$Tbl2,$acc09 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 rlwinm $s2,$acc02,24,0,7 + lbzx $acc11,$Tbl2,$acc11 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 rlwimi $s0,$acc04,16,8,15 + lbzx $acc13,$Tbl2,$acc13 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 rlwimi $s2,$acc06,16,8,15 + lbzx $acc15,$Tbl2,$acc15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 @@ -879,20 +1051,22 @@ Ldec_loop: xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .align 4 Lppc_AES_decrypt_compact: lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) addi $Tbl1,$Tbl0,2048 + lwz $t0,0($key) lis $mask80,0x8080 + lwz $t1,4($key) lis $mask1b,0x1b1b - addi $key,$key,16 + lwz $t2,8($key) ori $mask80,$mask80,0x8080 + lwz $t3,12($key) ori $mask1b,$mask1b,0x1b1b + addi $key,$key,16 ___ $code.=<<___ if ($SIZE_T==8); insrdi $mask80,$mask80,32,0 @@ -904,10 +1078,10 @@ $code.=<<___; Ldec_compact_loop: xor $s0,$s0,$t0 xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 rlwinm $acc00,$s0,`32-24`,24,31 + xor $s2,$s2,$t2 rlwinm $acc01,$s1,`32-24`,24,31 + xor $s3,$s3,$t3 rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 rlwinm $acc04,$s3,`32-16`,24,31 @@ -915,48 +1089,48 @@ Ldec_compact_loop: rlwinm $acc06,$s1,`32-16`,24,31 rlwinm $acc07,$s2,`32-16`,24,31 lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 rlwinm $acc08,$s2,`32-8`,24,31 + lbzx $acc01,$Tbl1,$acc01 rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 + lbzx $acc03,$Tbl1,$acc03 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s1,`0`,24,31 + lbzx $acc05,$Tbl1,$acc05 rlwinm $acc13,$s2,`0`,24,31 lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s3,`0`,24,31 + lbzx $acc07,$Tbl1,$acc07 rlwinm $acc15,$s0,`0`,24,31 lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 rlwinm $s0,$acc00,24,0,7 + lbzx $acc09,$Tbl1,$acc09 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 rlwinm $s2,$acc02,24,0,7 + lbzx $acc11,$Tbl1,$acc11 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 rlwimi $s0,$acc04,16,8,15 + lbzx $acc13,$Tbl1,$acc13 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 rlwimi $s2,$acc06,16,8,15 + lbzx $acc15,$Tbl1,$acc15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 lwz $t0,0($key) - lwz $t1,4($key) or $s0,$s0,$acc12 + lwz $t1,4($key) or $s1,$s1,$acc13 lwz $t2,8($key) - lwz $t3,12($key) or $s2,$s2,$acc14 + lwz $t3,12($key) or $s3,$s3,$acc15 addi $key,$key,16 @@ -1030,12 +1204,12 @@ $code.=<<___ if ($SIZE_T==4); and $acc02,$s2,$mask80 and $acc03,$s3,$mask80 srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f + srwi $acc05,$acc01,7 andc $acc09,$s1,$mask80 + srwi $acc06,$acc02,7 andc $acc10,$s2,$mask80 + srwi $acc07,$acc03,7 andc $acc11,$s3,$mask80 sub $acc00,$acc00,$acc04 # r1-(r1>>7) sub $acc01,$acc01,$acc05 @@ -1059,12 +1233,12 @@ $code.=<<___ if ($SIZE_T==4); and $acc06,$acc02,$mask80 and $acc07,$acc03,$mask80 srwi $acc08,$acc04,7 # r1>>7 - srwi $acc09,$acc05,7 - srwi $acc10,$acc06,7 - srwi $acc11,$acc07,7 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f + srwi $acc09,$acc05,7 andc $acc13,$acc01,$mask80 + srwi $acc10,$acc06,7 andc $acc14,$acc02,$mask80 + srwi $acc11,$acc07,7 andc $acc15,$acc03,$mask80 sub $acc04,$acc04,$acc08 # r1-(r1>>7) sub $acc05,$acc05,$acc09 @@ -1085,13 +1259,13 @@ $code.=<<___ if ($SIZE_T==4); and $acc08,$acc04,$mask80 # r1=r4&0x80808080 and $acc09,$acc05,$mask80 - and $acc10,$acc06,$mask80 - and $acc11,$acc07,$mask80 srwi $acc12,$acc08,7 # r1>>7 + and $acc10,$acc06,$mask80 srwi $acc13,$acc09,7 + and $acc11,$acc07,$mask80 srwi $acc14,$acc10,7 - srwi $acc15,$acc11,7 sub $acc08,$acc08,$acc12 # r1-(r1>>7) + srwi $acc15,$acc11,7 sub $acc09,$acc09,$acc13 sub $acc10,$acc10,$acc14 sub $acc11,$acc11,$acc15 @@ -1124,10 +1298,10 @@ ___ $code.=<<___; rotrwi $s0,$s0,8 # = ROTATE(r0,8) rotrwi $s1,$s1,8 - rotrwi $s2,$s2,8 - rotrwi $s3,$s3,8 xor $s0,$s0,$acc00 # ^= r2^r0 + rotrwi $s2,$s2,8 xor $s1,$s1,$acc01 + rotrwi $s3,$s3,8 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 xor $acc00,$acc00,$acc08 @@ -1135,32 +1309,32 @@ $code.=<<___; xor $acc02,$acc02,$acc10 xor $acc03,$acc03,$acc11 xor $s0,$s0,$acc04 # ^= r4^r0 - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 rotrwi $acc00,$acc00,24 + xor $s1,$s1,$acc05 rotrwi $acc01,$acc01,24 + xor $s2,$s2,$acc06 rotrwi $acc02,$acc02,24 + xor $s3,$s3,$acc07 rotrwi $acc03,$acc03,24 xor $acc04,$acc04,$acc08 xor $acc05,$acc05,$acc09 xor $acc06,$acc06,$acc10 xor $acc07,$acc07,$acc11 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 rotrwi $acc04,$acc04,16 + xor $s1,$s1,$acc09 rotrwi $acc05,$acc05,16 + xor $s2,$s2,$acc10 rotrwi $acc06,$acc06,16 + xor $s3,$s3,$acc11 rotrwi $acc07,$acc07,16 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 rotrwi $acc08,$acc08,8 + xor $s1,$s1,$acc01 rotrwi $acc09,$acc09,8 + xor $s2,$s2,$acc02 rotrwi $acc10,$acc10,8 + xor $s3,$s3,$acc03 rotrwi $acc11,$acc11,8 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) xor $s1,$s1,$acc05 @@ -1179,7 +1353,9 @@ Ldec_compact_done: xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr -.long 0 + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .asciz "AES for PPC, CRYPTOGAMS by " .align 7 ___ diff --git a/app/openssl/crypto/aes/asm/aes-s390x.pl b/app/openssl/crypto/aes/asm/aes-s390x.pl index 7e018892..e75dcd03 100644 --- a/app/openssl/crypto/aes/asm/aes-s390x.pl +++ b/app/openssl/crypto/aes/asm/aes-s390x.pl @@ -44,12 +44,57 @@ # Unlike previous version hardware support detection takes place only # at the moment of key schedule setup, which is denoted in key->rounds. # This is done, because deferred key setup can't be made MT-safe, not -# for key lengthes longer than 128 bits. +# for keys longer than 128 bits. # # Add AES_cbc_encrypt, which gives incredible performance improvement, # it was measured to be ~6.6x. It's less than previously mentioned 8x, # because software implementation was optimized. +# May 2010. +# +# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x +# performance improvement over "generic" counter mode routine relying +# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers +# to the fact that exact throughput value depends on current stack +# frame alignment within 4KB page. In worst case you get ~75% of the +# maximum, but *on average* it would be as much as ~98%. Meaning that +# worst case is unlike, it's like hitting ravine on plateau. + +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 2x better than code generated by gcc 4.3. + +# December 2010. +# +# Add support for z196 "cipher message with counter" instruction. +# Note however that it's disengaged, because it was measured to +# perform ~12% worse than vanilla km-based code... + +# February 2011. +# +# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes +# instructions, which deliver ~70% improvement at 8KB block size over +# vanilla km-based code, 37% - at most like 512-bytes block size. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + $softonly=0; # allow hardware support $t0="%r0"; $mask="%r0"; @@ -69,6 +114,8 @@ $rounds="%r13"; $ra="%r14"; $sp="%r15"; +$stdframe=16*$SIZE_T+4*8; + sub _data_word() { my $i; while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } @@ -210,7 +257,7 @@ $code.=<<___ if (!$softonly); .Lesoft: ___ $code.=<<___; - stmg %r3,$ra,24($sp) + stm${g} %r3,$ra,3*$SIZE_T($sp) llgf $s0,0($inp) llgf $s1,4($inp) @@ -220,20 +267,20 @@ $code.=<<___; larl $tbl,AES_Te bras $ra,_s390x_AES_encrypt - lg $out,24($sp) + l${g} $out,3*$SIZE_T($sp) st $s0,0($out) st $s1,4($out) st $s2,8($out) st $s3,12($out) - lmg %r6,$ra,48($sp) + lm${g} %r6,$ra,6*$SIZE_T($sp) br $ra .size AES_encrypt,.-AES_encrypt .type _s390x_AES_encrypt,\@function .align 16 _s390x_AES_encrypt: - stg $ra,152($sp) + st${g} $ra,15*$SIZE_T($sp) x $s0,0($key) x $s1,4($key) x $s2,8($key) @@ -397,7 +444,7 @@ _s390x_AES_encrypt: or $s2,$i3 or $s3,$t3 - lg $ra,152($sp) + l${g} $ra,15*$SIZE_T($sp) xr $s0,$t0 xr $s1,$t2 x $s2,24($key) @@ -536,7 +583,7 @@ $code.=<<___ if (!$softonly); .Ldsoft: ___ $code.=<<___; - stmg %r3,$ra,24($sp) + stm${g} %r3,$ra,3*$SIZE_T($sp) llgf $s0,0($inp) llgf $s1,4($inp) @@ -546,20 +593,20 @@ $code.=<<___; larl $tbl,AES_Td bras $ra,_s390x_AES_decrypt - lg $out,24($sp) + l${g} $out,3*$SIZE_T($sp) st $s0,0($out) st $s1,4($out) st $s2,8($out) st $s3,12($out) - lmg %r6,$ra,48($sp) + lm${g} %r6,$ra,6*$SIZE_T($sp) br $ra .size AES_decrypt,.-AES_decrypt .type _s390x_AES_decrypt,\@function .align 16 _s390x_AES_decrypt: - stg $ra,152($sp) + st${g} $ra,15*$SIZE_T($sp) x $s0,0($key) x $s1,4($key) x $s2,8($key) @@ -703,7 +750,7 @@ _s390x_AES_decrypt: nr $i1,$mask nr $i2,$mask - lg $ra,152($sp) + l${g} $ra,15*$SIZE_T($sp) or $s1,$t1 l $t0,16($key) l $t1,20($key) @@ -732,14 +779,15 @@ ___ $code.=<<___; # void AES_set_encrypt_key(const unsigned char *in, int bits, # AES_KEY *key) { -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,\@function +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,\@function .align 16 -AES_set_encrypt_key: +private_AES_set_encrypt_key: +_s390x_AES_set_encrypt_key: lghi $t0,0 - clgr $inp,$t0 + cl${g}r $inp,$t0 je .Lminus1 - clgr $key,$t0 + cl${g}r $key,$t0 je .Lminus1 lghi $t0,128 @@ -789,7 +837,8 @@ $code.=<<___ if (!$softonly); je 1f lg %r1,24($inp) stg %r1,24($key) -1: st $bits,236($key) # save bits +1: st $bits,236($key) # save bits [for debugging purposes] + lgr $t0,%r5 st %r5,240($key) # save km code lghi %r2,0 br %r14 @@ -797,7 +846,7 @@ ___ $code.=<<___; .align 16 .Lekey_internal: - stmg %r6,%r13,48($sp) # all non-volatile regs + stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key larl $tbl,AES_Te+2048 @@ -857,8 +906,9 @@ $code.=<<___; la $key,16($key) # key+=4 la $t3,4($t3) # i++ brct $rounds,.L128_loop + lghi $t0,10 lghi %r2,0 - lmg %r6,%r13,48($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -905,8 +955,9 @@ $code.=<<___; st $s2,32($key) st $s3,36($key) brct $rounds,.L192_continue + lghi $t0,12 lghi %r2,0 - lmg %r6,%r13,48($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -967,8 +1018,9 @@ $code.=<<___; st $s2,40($key) st $s3,44($key) brct $rounds,.L256_continue + lghi $t0,14 lghi %r2,0 - lmg %r6,%r13,48($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -1011,42 +1063,34 @@ $code.=<<___; .Lminus1: lghi %r2,-1 br $ra -.size AES_set_encrypt_key,.-AES_set_encrypt_key +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key # void AES_set_decrypt_key(const unsigned char *in, int bits, # AES_KEY *key) { -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,\@function +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,\@function .align 16 -AES_set_decrypt_key: - stg $key,32($sp) # I rely on AES_set_encrypt_key to - stg $ra,112($sp) # save non-volatile registers! - bras $ra,AES_set_encrypt_key - lg $key,32($sp) - lg $ra,112($sp) +private_AES_set_decrypt_key: + #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to + st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! + bras $ra,_s390x_AES_set_encrypt_key + #l${g} $key,4*$SIZE_T($sp) + l${g} $ra,14*$SIZE_T($sp) ltgr %r2,%r2 bnzr $ra ___ $code.=<<___ if (!$softonly); - l $t0,240($key) + #l $t0,240($key) lhi $t1,16 cr $t0,$t1 jl .Lgo oill $t0,0x80 # set "decrypt" bit st $t0,240($key) br $ra - -.align 16 -.Ldkey_internal: - stg $key,32($sp) - stg $ra,40($sp) - bras $ra,.Lekey_internal - lg $key,32($sp) - lg $ra,40($sp) ___ $code.=<<___; - -.Lgo: llgf $rounds,240($key) +.align 16 +.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) la $i1,0($key) sllg $i2,$rounds,4 la $i2,0($i2,$key) @@ -1123,13 +1167,14 @@ $code.=<<___; la $key,4($key) brct $rounds,.Lmix - lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! + lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! lghi %r2,0 br $ra -.size AES_set_decrypt_key,.-AES_set_decrypt_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key ___ -#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, +######################################################################## +# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, # size_t length, const AES_KEY *key, # unsigned char *ivec, const int enc) { @@ -1163,7 +1208,7 @@ $code.=<<___ if (!$softonly); l %r0,240($key) # load kmc code lghi $key,15 # res=len%16, len-=res; ngr $key,$len - slgr $len,$key + sl${g}r $len,$key la %r1,16($sp) # parameter block - ivec || key jz .Lkmc_truncated .long 0xb92f0042 # kmc %r4,%r2 @@ -1181,34 +1226,34 @@ $code.=<<___ if (!$softonly); tmll %r0,0x80 jnz .Lkmc_truncated_dec lghi %r1,0 - stg %r1,128($sp) - stg %r1,136($sp) + stg %r1,16*$SIZE_T($sp) + stg %r1,16*$SIZE_T+8($sp) bras %r1,1f - mvc 128(1,$sp),0($inp) + mvc 16*$SIZE_T(1,$sp),0($inp) 1: ex $key,0(%r1) la %r1,16($sp) # restore parameter block - la $inp,128($sp) + la $inp,16*$SIZE_T($sp) lghi $len,16 .long 0xb92f0042 # kmc %r4,%r2 j .Lkmc_done .align 16 .Lkmc_truncated_dec: - stg $out,64($sp) - la $out,128($sp) + st${g} $out,4*$SIZE_T($sp) + la $out,16*$SIZE_T($sp) lghi $len,16 .long 0xb92f0042 # kmc %r4,%r2 - lg $out,64($sp) + l${g} $out,4*$SIZE_T($sp) bras %r1,2f - mvc 0(1,$out),128($sp) + mvc 0(1,$out),16*$SIZE_T($sp) 2: ex $key,0(%r1) j .Lkmc_done .align 16 .Lcbc_software: ___ $code.=<<___; - stmg $key,$ra,40($sp) + stm${g} $key,$ra,5*$SIZE_T($sp) lhi %r0,0 - cl %r0,164($sp) + cl %r0,`$stdframe+$SIZE_T-4`($sp) je .Lcbc_decrypt larl $tbl,AES_Te @@ -1219,10 +1264,10 @@ $code.=<<___; llgf $s3,12($ivp) lghi $t0,16 - slgr $len,$t0 + sl${g}r $len,$t0 brc 4,.Lcbc_enc_tail # if borrow .Lcbc_enc_loop: - stmg $inp,$out,16($sp) + stm${g} $inp,$out,2*$SIZE_T($sp) x $s0,0($inp) x $s1,4($inp) x $s2,8($inp) @@ -1231,7 +1276,7 @@ $code.=<<___; bras $ra,_s390x_AES_encrypt - lmg $inp,$key,16($sp) + lm${g} $inp,$key,2*$SIZE_T($sp) st $s0,0($out) st $s1,4($out) st $s2,8($out) @@ -1240,33 +1285,33 @@ $code.=<<___; la $inp,16($inp) la $out,16($out) lghi $t0,16 - ltgr $len,$len + lt${g}r $len,$len jz .Lcbc_enc_done - slgr $len,$t0 + sl${g}r $len,$t0 brc 4,.Lcbc_enc_tail # if borrow j .Lcbc_enc_loop .align 16 .Lcbc_enc_done: - lg $ivp,48($sp) + l${g} $ivp,6*$SIZE_T($sp) st $s0,0($ivp) st $s1,4($ivp) st $s2,8($ivp) st $s3,12($ivp) - lmg %r7,$ra,56($sp) + lm${g} %r7,$ra,7*$SIZE_T($sp) br $ra .align 16 .Lcbc_enc_tail: aghi $len,15 lghi $t0,0 - stg $t0,128($sp) - stg $t0,136($sp) + stg $t0,16*$SIZE_T($sp) + stg $t0,16*$SIZE_T+8($sp) bras $t1,3f - mvc 128(1,$sp),0($inp) + mvc 16*$SIZE_T(1,$sp),0($inp) 3: ex $len,0($t1) lghi $len,0 - la $inp,128($sp) + la $inp,16*$SIZE_T($sp) j .Lcbc_enc_loop .align 16 @@ -1275,10 +1320,10 @@ $code.=<<___; lg $t0,0($ivp) lg $t1,8($ivp) - stmg $t0,$t1,128($sp) + stmg $t0,$t1,16*$SIZE_T($sp) .Lcbc_dec_loop: - stmg $inp,$out,16($sp) + stm${g} $inp,$out,2*$SIZE_T($sp) llgf $s0,0($inp) llgf $s1,4($inp) llgf $s2,8($inp) @@ -1287,7 +1332,7 @@ $code.=<<___; bras $ra,_s390x_AES_decrypt - lmg $inp,$key,16($sp) + lm${g} $inp,$key,2*$SIZE_T($sp) sllg $s0,$s0,32 sllg $s2,$s2,32 lr $s0,$s1 @@ -1295,15 +1340,15 @@ $code.=<<___; lg $t0,0($inp) lg $t1,8($inp) - xg $s0,128($sp) - xg $s2,136($sp) + xg $s0,16*$SIZE_T($sp) + xg $s2,16*$SIZE_T+8($sp) lghi $s1,16 - slgr $len,$s1 + sl${g}r $len,$s1 brc 4,.Lcbc_dec_tail # if borrow brc 2,.Lcbc_dec_done # if zero stg $s0,0($out) stg $s2,8($out) - stmg $t0,$t1,128($sp) + stmg $t0,$t1,16*$SIZE_T($sp) la $inp,16($inp) la $out,16($out) @@ -1313,7 +1358,7 @@ $code.=<<___; stg $s0,0($out) stg $s2,8($out) .Lcbc_dec_exit: - lmg $ivp,$ra,48($sp) + lm${g} %r6,$ra,6*$SIZE_T($sp) stmg $t0,$t1,0($ivp) br $ra @@ -1321,19 +1366,872 @@ $code.=<<___; .align 16 .Lcbc_dec_tail: aghi $len,15 - stg $s0,128($sp) - stg $s2,136($sp) + stg $s0,16*$SIZE_T($sp) + stg $s2,16*$SIZE_T+8($sp) bras $s1,4f - mvc 0(1,$out),128($sp) + mvc 0(1,$out),16*$SIZE_T($sp) 4: ex $len,0($s1) j .Lcbc_dec_exit .size AES_cbc_encrypt,.-AES_cbc_encrypt -.comm OPENSSL_s390xcap_P,8,8 +___ +} +######################################################################## +# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, +# size_t blocks, const AES_KEY *key, +# const unsigned char *ivec) +{ +my $inp="%r2"; +my $out="%r4"; # blocks and out are swapped +my $len="%r3"; +my $key="%r5"; my $iv0="%r5"; +my $ivp="%r6"; +my $fp ="%r7"; + +$code.=<<___; +.globl AES_ctr32_encrypt +.type AES_ctr32_encrypt,\@function +.align 16 +AES_ctr32_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, $out and $len + xgr %r4,%r3 + xgr %r3,%r4 + llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case +___ +$code.=<<___ if (!$softonly); + l %r0,240($key) + lhi %r1,16 + clr %r0,%r1 + jl .Lctr32_software + + stm${g} %r6,$s3,6*$SIZE_T($sp) + + slgr $out,$inp + la %r1,0($key) # %r1 is permanent copy of $key + lg $iv0,0($ivp) # load ivec + lg $ivp,8($ivp) + + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer + lghi $s1,-4096 + algr $s0,$sp + lgr $fp,$sp + ngr $s0,$s1 # align at page boundary + slgr $fp,$s0 # total buffer size + lgr $s2,$sp + lghi $s1,1024+16 # sl[g]fi is extended-immediate facility + slgr $fp,$s1 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la $sp,1024($s0) # alloca + srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 + st${g} $s2,0($sp) # back-chain + st${g} $fp,$SIZE_T($sp) + + slgr $len,$fp + brc 1,.Lctr32_hw_switch # not zero, no borrow + algr $fp,$len # input is shorter than allocated buffer + lghi $len,0 + st${g} $fp,$SIZE_T($sp) + +.Lctr32_hw_switch: +___ +$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower + larl $s0,OPENSSL_s390xcap_P + lg $s0,8($s0) + tmhh $s0,0x0004 # check for message_security-assist-4 + jz .Lctr32_km_loop + + llgfr $s0,%r0 + lgr $s1,%r1 + lghi %r0,0 + la %r1,16($sp) + .long 0xb92d2042 # kmctr %r4,%r2,%r2 + + llihh %r0,0x8000 # check if kmctr supports the function code + srlg %r0,%r0,0($s0) + ng %r0,16($sp) + lgr %r0,$s0 + lgr %r1,$s1 + jz .Lctr32_km_loop + +####### kmctr code + algr $out,$inp # restore $out + lgr $s1,$len # $s1 undertakes $len + j .Lctr32_kmctr_loop +.align 16 +.Lctr32_kmctr_loop: + la $s2,16($sp) + lgr $s3,$fp +.Lctr32_kmctr_prepare: + stg $iv0,0($s2) + stg $ivp,8($s2) + la $s2,16($s2) + ahi $ivp,1 # 32-bit increment, preserves upper half + brct $s3,.Lctr32_kmctr_prepare + + #la $inp,0($inp) # inp + sllg $len,$fp,4 # len + #la $out,0($out) # out + la $s2,16($sp) # iv + .long 0xb92da042 # kmctr $out,$s2,$inp + brc 1,.-4 # pay attention to "partial completion" + + slgr $s1,$fp + brc 1,.Lctr32_kmctr_loop # not zero, no borrow + algr $fp,$s1 + lghi $s1,0 + brc 4+1,.Lctr32_kmctr_loop # not zero + + l${g} $sp,0($sp) + lm${g} %r6,$s3,6*$SIZE_T($sp) + br $ra +.align 16 +___ +$code.=<<___; +.Lctr32_km_loop: + la $s2,16($sp) + lgr $s3,$fp +.Lctr32_km_prepare: + stg $iv0,0($s2) + stg $ivp,8($s2) + la $s2,16($s2) + ahi $ivp,1 # 32-bit increment, preserves upper half + brct $s3,.Lctr32_km_prepare + + la $s0,16($sp) # inp + sllg $s1,$fp,4 # len + la $s2,16($sp) # out + .long 0xb92e00a8 # km %r10,%r8 + brc 1,.-4 # pay attention to "partial completion" + + la $s2,16($sp) + lgr $s3,$fp + slgr $s2,$inp +.Lctr32_km_xor: + lg $s0,0($inp) + lg $s1,8($inp) + xg $s0,0($s2,$inp) + xg $s1,8($s2,$inp) + stg $s0,0($out,$inp) + stg $s1,8($out,$inp) + la $inp,16($inp) + brct $s3,.Lctr32_km_xor + + slgr $len,$fp + brc 1,.Lctr32_km_loop # not zero, no borrow + algr $fp,$len + lghi $len,0 + brc 4+1,.Lctr32_km_loop # not zero + + l${g} $s0,0($sp) + l${g} $s1,$SIZE_T($sp) + la $s2,16($sp) +.Lctr32_km_zap: + stg $s0,0($s2) + stg $s0,8($s2) + la $s2,16($s2) + brct $s1,.Lctr32_km_zap + + la $sp,0($s0) + lm${g} %r6,$s3,6*$SIZE_T($sp) + br $ra +.align 16 +.Lctr32_software: +___ +$code.=<<___; + stm${g} $key,$ra,5*$SIZE_T($sp) + sl${g}r $inp,$out + larl $tbl,AES_Te + llgf $t1,12($ivp) + +.Lctr32_loop: + stm${g} $inp,$out,2*$SIZE_T($sp) + llgf $s0,0($ivp) + llgf $s1,4($ivp) + llgf $s2,8($ivp) + lgr $s3,$t1 + st $t1,16*$SIZE_T($sp) + lgr %r4,$key + + bras $ra,_s390x_AES_encrypt + + lm${g} $inp,$ivp,2*$SIZE_T($sp) + llgf $t1,16*$SIZE_T($sp) + x $s0,0($inp,$out) + x $s1,4($inp,$out) + x $s2,8($inp,$out) + x $s3,12($inp,$out) + stm $s0,$s3,0($out) + + la $out,16($out) + ahi $t1,1 # 32-bit increment + brct $len,.Lctr32_loop + + lm${g} %r6,$ra,6*$SIZE_T($sp) + br $ra +.size AES_ctr32_encrypt,.-AES_ctr32_encrypt +___ +} + +######################################################################## +# void AES_xts_encrypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +{ +my $inp="%r2"; +my $out="%r4"; # len and out are swapped +my $len="%r3"; +my $key1="%r5"; # $i1 +my $key2="%r6"; # $i2 +my $fp="%r7"; # $i3 +my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... + +$code.=<<___; +.type _s390x_xts_km,\@function +.align 16 +_s390x_xts_km: +___ +$code.=<<___ if(1); + llgfr $s0,%r0 # put aside the function code + lghi $s1,0x7f + nr $s1,%r0 + lghi %r0,0 # query capability vector + la %r1,$tweak-16($sp) + .long 0xb92e0042 # km %r4,%r2 + llihh %r1,0x8000 + srlg %r1,%r1,32($s1) # check for 32+function code + ng %r1,$tweak-16($sp) + lgr %r0,$s0 # restore the function code + la %r1,0($key1) # restore $key1 + jz .Lxts_km_vanilla + + lmg $i2,$i3,$tweak($sp) # put aside the tweak value + algr $out,$inp + + oill %r0,32 # switch to xts function code + aghi $s1,-18 # + sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 + la %r1,$tweak-16($sp) + slgr %r1,$s1 # parameter block position + lmg $s0,$s3,0($key1) # load 256 bits of key material, + stmg $s0,$s3,0(%r1) # and copy it to parameter block. + # yes, it contains junk and overlaps + # with the tweak in 128-bit case. + # it's done to avoid conditional + # branch. + stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value + + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + lrvg $s0,$tweak+0($sp) # load the last tweak + lrvg $s1,$tweak+8($sp) + stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key + + nill %r0,0xffdf # switch back to original function code + la %r1,0($key1) # restore pointer to $key1 + slgr $out,$inp + + llgc $len,2*$SIZE_T-1($sp) + nill $len,0x0f # $len%=16 + br $ra + +.align 16 +.Lxts_km_vanilla: +___ +$code.=<<___; + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer + lghi $s1,-4096 + algr $s0,$sp + lgr $fp,$sp + ngr $s0,$s1 # align at page boundary + slgr $fp,$s0 # total buffer size + lgr $s2,$sp + lghi $s1,1024+16 # sl[g]fi is extended-immediate facility + slgr $fp,$s1 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la $sp,1024($s0) # alloca + nill $fp,0xfff0 # round to 16*n + st${g} $s2,0($sp) # back-chain + nill $len,0xfff0 # redundant + st${g} $fp,$SIZE_T($sp) + + slgr $len,$fp + brc 1,.Lxts_km_go # not zero, no borrow + algr $fp,$len # input is shorter than allocated buffer + lghi $len,0 + st${g} $fp,$SIZE_T($sp) + +.Lxts_km_go: + lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian + lrvg $s1,$tweak+8($s2) + + la $s2,16($sp) # vector of ascending tweak values + slgr $s2,$inp + srlg $s3,$fp,4 + j .Lxts_km_start + +.Lxts_km_loop: + la $s2,16($sp) + slgr $s2,$inp + srlg $s3,$fp,4 +.Lxts_km_prepare: + lghi $i1,0x87 + srag $i2,$s1,63 # broadcast upper bit + ngr $i1,$i2 # rem + algr $s0,$s0 + alcgr $s1,$s1 + xgr $s0,$i1 +.Lxts_km_start: + lrvgr $i1,$s0 # flip byte order + lrvgr $i2,$s1 + stg $i1,0($s2,$inp) + stg $i2,8($s2,$inp) + xg $i1,0($inp) + xg $i2,8($inp) + stg $i1,0($out,$inp) + stg $i2,8($out,$inp) + la $inp,16($inp) + brct $s3,.Lxts_km_prepare + + slgr $inp,$fp # rewind $inp + la $s2,0($out,$inp) + lgr $s3,$fp + .long 0xb92e00aa # km $s2,$s2 + brc 1,.-4 # pay attention to "partial completion" + + la $s2,16($sp) + slgr $s2,$inp + srlg $s3,$fp,4 +.Lxts_km_xor: + lg $i1,0($out,$inp) + lg $i2,8($out,$inp) + xg $i1,0($s2,$inp) + xg $i2,8($s2,$inp) + stg $i1,0($out,$inp) + stg $i2,8($out,$inp) + la $inp,16($inp) + brct $s3,.Lxts_km_xor + + slgr $len,$fp + brc 1,.Lxts_km_loop # not zero, no borrow + algr $fp,$len + lghi $len,0 + brc 4+1,.Lxts_km_loop # not zero + + l${g} $i1,0($sp) # back-chain + llgf $fp,`2*$SIZE_T-4`($sp) # bytes used + la $i2,16($sp) + srlg $fp,$fp,4 +.Lxts_km_zap: + stg $i1,0($i2) + stg $i1,8($i2) + la $i2,16($i2) + brct $fp,.Lxts_km_zap + + la $sp,0($i1) + llgc $len,2*$SIZE_T-1($i1) + nill $len,0x0f # $len%=16 + bzr $ra + + # generate one more tweak... + lghi $i1,0x87 + srag $i2,$s1,63 # broadcast upper bit + ngr $i1,$i2 # rem + algr $s0,$s0 + alcgr $s1,$s1 + xgr $s0,$i1 + + ltr $len,$len # clear zero flag + br $ra +.size _s390x_xts_km,.-_s390x_xts_km + +.globl AES_xts_encrypt +.type AES_xts_encrypt,\@function +.align 16 +AES_xts_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, $out and $len + xgr %r4,%r3 + xgr %r3,%r4 +___ +$code.=<<___ if ($SIZE_T==4); + llgfr $len,$len +___ +$code.=<<___; + st${g} $len,1*$SIZE_T($sp) # save copy of $len + srag $len,$len,4 # formally wrong, because it expands + # sign byte, but who can afford asking + # to process more than 2^63-1 bytes? + # I use it, because it sets condition + # code... + bcr 8,$ra # abort if zero (i.e. less than 16) +___ +$code.=<<___ if (!$softonly); + llgf %r0,240($key2) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_enc_software + + st${g} $ra,5*$SIZE_T($sp) + stm${g} %r6,$s3,6*$SIZE_T($sp) + + sllg $len,$len,4 # $len&=~15 + slgr $out,$inp + + # generate the tweak value + l${g} $s3,$stdframe($sp) # pointer to iv + la $s2,$tweak($sp) + lmg $s0,$s1,0($s3) + lghi $s3,16 + stmg $s0,$s1,0($s2) + la %r1,0($key2) # $key2 is not needed anymore + .long 0xb92e00aa # km $s2,$s2, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240($key1) + la %r1,0($key1) # $key1 is not needed anymore + bras $ra,_s390x_xts_km + jz .Lxts_enc_km_done + + aghi $inp,-16 # take one step back + la $i3,0($out,$inp) # put aside real $out +.Lxts_enc_km_steal: + llgc $i1,16($inp) + llgc $i2,0($out,$inp) + stc $i1,0($out,$inp) + stc $i2,16($out,$inp) + la $inp,1($inp) + brct $len,.Lxts_enc_km_steal + + la $s2,0($i3) + lghi $s3,16 + lrvgr $i1,$s0 # flip byte order + lrvgr $i2,$s1 + xg $i1,0($s2) + xg $i2,8($s2) + stg $i1,0($s2) + stg $i2,8($s2) + .long 0xb92e00aa # km $s2,$s2 + brc 1,.-4 # can this happen? + lrvgr $i1,$s0 # flip byte order + lrvgr $i2,$s1 + xg $i1,0($i3) + xg $i2,8($i3) + stg $i1,0($i3) + stg $i2,8($i3) + +.Lxts_enc_km_done: + stg $sp,$tweak+0($sp) # wipe tweak + stg $sp,$tweak+8($sp) + l${g} $ra,5*$SIZE_T($sp) + lm${g} %r6,$s3,6*$SIZE_T($sp) + br $ra +.align 16 +.Lxts_enc_software: +___ +$code.=<<___; + stm${g} %r6,$ra,6*$SIZE_T($sp) + + slgr $out,$inp + + l${g} $s3,$stdframe($sp) # ivp + llgf $s0,0($s3) # load iv + llgf $s1,4($s3) + llgf $s2,8($s3) + llgf $s3,12($s3) + stm${g} %r2,%r5,2*$SIZE_T($sp) + la $key,0($key2) + larl $tbl,AES_Te + bras $ra,_s390x_AES_encrypt # generate the tweak + lm${g} %r2,%r5,2*$SIZE_T($sp) + stm $s0,$s3,$tweak($sp) # save the tweak + j .Lxts_enc_enter + +.align 16 +.Lxts_enc_loop: + lrvg $s1,$tweak+0($sp) # load the tweak in little-endian + lrvg $s3,$tweak+8($sp) + lghi %r1,0x87 + srag %r0,$s3,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr $s1,$s1 + alcgr $s3,$s3 + xgr $s1,%r1 + lrvgr $s1,$s1 # flip byte order + lrvgr $s3,$s3 + srlg $s0,$s1,32 # smash the tweak to 4x32-bits + stg $s1,$tweak+0($sp) # save the tweak + llgfr $s1,$s1 + srlg $s2,$s3,32 + stg $s3,$tweak+8($sp) + llgfr $s3,$s3 + la $inp,16($inp) # $inp+=16 +.Lxts_enc_enter: + x $s0,0($inp) # ^=*($inp) + x $s1,4($inp) + x $s2,8($inp) + x $s3,12($inp) + stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing + la $key,0($key1) + bras $ra,_s390x_AES_encrypt + lm${g} %r2,%r5,2*$SIZE_T($sp) + x $s0,$tweak+0($sp) # ^=tweak + x $s1,$tweak+4($sp) + x $s2,$tweak+8($sp) + x $s3,$tweak+12($sp) + st $s0,0($out,$inp) + st $s1,4($out,$inp) + st $s2,8($out,$inp) + st $s3,12($out,$inp) + brct${g} $len,.Lxts_enc_loop + + llgc $len,`2*$SIZE_T-1`($sp) + nill $len,0x0f # $len%16 + jz .Lxts_enc_done + + la $i3,0($inp,$out) # put aside real $out +.Lxts_enc_steal: + llgc %r0,16($inp) + llgc %r1,0($out,$inp) + stc %r0,0($out,$inp) + stc %r1,16($out,$inp) + la $inp,1($inp) + brct $len,.Lxts_enc_steal + la $out,0($i3) # restore real $out + + # generate last tweak... + lrvg $s1,$tweak+0($sp) # load the tweak in little-endian + lrvg $s3,$tweak+8($sp) + lghi %r1,0x87 + srag %r0,$s3,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr $s1,$s1 + alcgr $s3,$s3 + xgr $s1,%r1 + lrvgr $s1,$s1 # flip byte order + lrvgr $s3,$s3 + srlg $s0,$s1,32 # smash the tweak to 4x32-bits + stg $s1,$tweak+0($sp) # save the tweak + llgfr $s1,$s1 + srlg $s2,$s3,32 + stg $s3,$tweak+8($sp) + llgfr $s3,$s3 + + x $s0,0($out) # ^=*(inp)|stolen cipther-text + x $s1,4($out) + x $s2,8($out) + x $s3,12($out) + st${g} $out,4*$SIZE_T($sp) + la $key,0($key1) + bras $ra,_s390x_AES_encrypt + l${g} $out,4*$SIZE_T($sp) + x $s0,`$tweak+0`($sp) # ^=tweak + x $s1,`$tweak+4`($sp) + x $s2,`$tweak+8`($sp) + x $s3,`$tweak+12`($sp) + st $s0,0($out) + st $s1,4($out) + st $s2,8($out) + st $s3,12($out) + +.Lxts_enc_done: + stg $sp,$tweak+0($sp) # wipe tweak + stg $sp,$twesk+8($sp) + lm${g} %r6,$ra,6*$SIZE_T($sp) + br $ra +.size AES_xts_encrypt,.-AES_xts_encrypt +___ +# void AES_xts_decrypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +$code.=<<___; +.globl AES_xts_decrypt +.type AES_xts_decrypt,\@function +.align 16 +AES_xts_decrypt: + xgr %r3,%r4 # flip %r3 and %r4, $out and $len + xgr %r4,%r3 + xgr %r3,%r4 +___ +$code.=<<___ if ($SIZE_T==4); + llgfr $len,$len +___ +$code.=<<___; + st${g} $len,1*$SIZE_T($sp) # save copy of $len + aghi $len,-16 + bcr 4,$ra # abort if less than zero. formally + # wrong, because $len is unsigned, + # but who can afford asking to + # process more than 2^63-1 bytes? + tmll $len,0x0f + jnz .Lxts_dec_proceed + aghi $len,16 +.Lxts_dec_proceed: +___ +$code.=<<___ if (!$softonly); + llgf %r0,240($key2) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_dec_software + + st${g} $ra,5*$SIZE_T($sp) + stm${g} %r6,$s3,6*$SIZE_T($sp) + + nill $len,0xfff0 # $len&=~15 + slgr $out,$inp + + # generate the tweak value + l${g} $s3,$stdframe($sp) # pointer to iv + la $s2,$tweak($sp) + lmg $s0,$s1,0($s3) + lghi $s3,16 + stmg $s0,$s1,0($s2) + la %r1,0($key2) # $key2 is not needed past this point + .long 0xb92e00aa # km $s2,$s2, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240($key1) + la %r1,0($key1) # $key1 is not needed anymore + + ltgr $len,$len + jz .Lxts_dec_km_short + bras $ra,_s390x_xts_km + jz .Lxts_dec_km_done + + lrvgr $s2,$s0 # make copy in reverse byte order + lrvgr $s3,$s1 + j .Lxts_dec_km_2ndtweak + +.Lxts_dec_km_short: + llgc $len,`2*$SIZE_T-1`($sp) + nill $len,0x0f # $len%=16 + lrvg $s0,$tweak+0($sp) # load the tweak + lrvg $s1,$tweak+8($sp) + lrvgr $s2,$s0 # make copy in reverse byte order + lrvgr $s3,$s1 + +.Lxts_dec_km_2ndtweak: + lghi $i1,0x87 + srag $i2,$s1,63 # broadcast upper bit + ngr $i1,$i2 # rem + algr $s0,$s0 + alcgr $s1,$s1 + xgr $s0,$i1 + lrvgr $i1,$s0 # flip byte order + lrvgr $i2,$s1 + + xg $i1,0($inp) + xg $i2,8($inp) + stg $i1,0($out,$inp) + stg $i2,8($out,$inp) + la $i2,0($out,$inp) + lghi $i3,16 + .long 0xb92e0066 # km $i2,$i2 + brc 1,.-4 # can this happen? + lrvgr $i1,$s0 + lrvgr $i2,$s1 + xg $i1,0($out,$inp) + xg $i2,8($out,$inp) + stg $i1,0($out,$inp) + stg $i2,8($out,$inp) + + la $i3,0($out,$inp) # put aside real $out +.Lxts_dec_km_steal: + llgc $i1,16($inp) + llgc $i2,0($out,$inp) + stc $i1,0($out,$inp) + stc $i2,16($out,$inp) + la $inp,1($inp) + brct $len,.Lxts_dec_km_steal + + lgr $s0,$s2 + lgr $s1,$s3 + xg $s0,0($i3) + xg $s1,8($i3) + stg $s0,0($i3) + stg $s1,8($i3) + la $s0,0($i3) + lghi $s1,16 + .long 0xb92e0088 # km $s0,$s0 + brc 1,.-4 # can this happen? + xg $s2,0($i3) + xg $s3,8($i3) + stg $s2,0($i3) + stg $s3,8($i3) +.Lxts_dec_km_done: + stg $sp,$tweak+0($sp) # wipe tweak + stg $sp,$tweak+8($sp) + l${g} $ra,5*$SIZE_T($sp) + lm${g} %r6,$s3,6*$SIZE_T($sp) + br $ra +.align 16 +.Lxts_dec_software: +___ +$code.=<<___; + stm${g} %r6,$ra,6*$SIZE_T($sp) + + srlg $len,$len,4 + slgr $out,$inp + + l${g} $s3,$stdframe($sp) # ivp + llgf $s0,0($s3) # load iv + llgf $s1,4($s3) + llgf $s2,8($s3) + llgf $s3,12($s3) + stm${g} %r2,%r5,2*$SIZE_T($sp) + la $key,0($key2) + larl $tbl,AES_Te + bras $ra,_s390x_AES_encrypt # generate the tweak + lm${g} %r2,%r5,2*$SIZE_T($sp) + larl $tbl,AES_Td + lt${g}r $len,$len + stm $s0,$s3,$tweak($sp) # save the tweak + jz .Lxts_dec_short + j .Lxts_dec_enter + +.align 16 +.Lxts_dec_loop: + lrvg $s1,$tweak+0($sp) # load the tweak in little-endian + lrvg $s3,$tweak+8($sp) + lghi %r1,0x87 + srag %r0,$s3,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr $s1,$s1 + alcgr $s3,$s3 + xgr $s1,%r1 + lrvgr $s1,$s1 # flip byte order + lrvgr $s3,$s3 + srlg $s0,$s1,32 # smash the tweak to 4x32-bits + stg $s1,$tweak+0($sp) # save the tweak + llgfr $s1,$s1 + srlg $s2,$s3,32 + stg $s3,$tweak+8($sp) + llgfr $s3,$s3 +.Lxts_dec_enter: + x $s0,0($inp) # tweak^=*(inp) + x $s1,4($inp) + x $s2,8($inp) + x $s3,12($inp) + stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing + la $key,0($key1) + bras $ra,_s390x_AES_decrypt + lm${g} %r2,%r5,2*$SIZE_T($sp) + x $s0,$tweak+0($sp) # ^=tweak + x $s1,$tweak+4($sp) + x $s2,$tweak+8($sp) + x $s3,$tweak+12($sp) + st $s0,0($out,$inp) + st $s1,4($out,$inp) + st $s2,8($out,$inp) + st $s3,12($out,$inp) + la $inp,16($inp) + brct${g} $len,.Lxts_dec_loop + + llgc $len,`2*$SIZE_T-1`($sp) + nill $len,0x0f # $len%16 + jz .Lxts_dec_done + + # generate pair of tweaks... + lrvg $s1,$tweak+0($sp) # load the tweak in little-endian + lrvg $s3,$tweak+8($sp) + lghi %r1,0x87 + srag %r0,$s3,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr $s1,$s1 + alcgr $s3,$s3 + xgr $s1,%r1 + lrvgr $i2,$s1 # flip byte order + lrvgr $i3,$s3 + stmg $i2,$i3,$tweak($sp) # save the 1st tweak + j .Lxts_dec_2ndtweak + +.align 16 +.Lxts_dec_short: + llgc $len,`2*$SIZE_T-1`($sp) + nill $len,0x0f # $len%16 + lrvg $s1,$tweak+0($sp) # load the tweak in little-endian + lrvg $s3,$tweak+8($sp) +.Lxts_dec_2ndtweak: + lghi %r1,0x87 + srag %r0,$s3,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr $s1,$s1 + alcgr $s3,$s3 + xgr $s1,%r1 + lrvgr $s1,$s1 # flip byte order + lrvgr $s3,$s3 + srlg $s0,$s1,32 # smash the tweak to 4x32-bits + stg $s1,$tweak-16+0($sp) # save the 2nd tweak + llgfr $s1,$s1 + srlg $s2,$s3,32 + stg $s3,$tweak-16+8($sp) + llgfr $s3,$s3 + + x $s0,0($inp) # tweak_the_2nd^=*(inp) + x $s1,4($inp) + x $s2,8($inp) + x $s3,12($inp) + stm${g} %r2,%r3,2*$SIZE_T($sp) + la $key,0($key1) + bras $ra,_s390x_AES_decrypt + lm${g} %r2,%r5,2*$SIZE_T($sp) + x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd + x $s1,$tweak-16+4($sp) + x $s2,$tweak-16+8($sp) + x $s3,$tweak-16+12($sp) + st $s0,0($out,$inp) + st $s1,4($out,$inp) + st $s2,8($out,$inp) + st $s3,12($out,$inp) + + la $i3,0($out,$inp) # put aside real $out +.Lxts_dec_steal: + llgc %r0,16($inp) + llgc %r1,0($out,$inp) + stc %r0,0($out,$inp) + stc %r1,16($out,$inp) + la $inp,1($inp) + brct $len,.Lxts_dec_steal + la $out,0($i3) # restore real $out + + lm $s0,$s3,$tweak($sp) # load the 1st tweak + x $s0,0($out) # tweak^=*(inp)|stolen cipher-text + x $s1,4($out) + x $s2,8($out) + x $s3,12($out) + st${g} $out,4*$SIZE_T($sp) + la $key,0($key1) + bras $ra,_s390x_AES_decrypt + l${g} $out,4*$SIZE_T($sp) + x $s0,$tweak+0($sp) # ^=tweak + x $s1,$tweak+4($sp) + x $s2,$tweak+8($sp) + x $s3,$tweak+12($sp) + st $s0,0($out) + st $s1,4($out) + st $s2,8($out) + st $s3,12($out) + stg $sp,$tweak-16+0($sp) # wipe 2nd tweak + stg $sp,$tweak-16+8($sp) +.Lxts_dec_done: + stg $sp,$tweak+0($sp) # wipe tweak + stg $sp,$twesk+8($sp) + lm${g} %r6,$ra,6*$SIZE_T($sp) + br $ra +.size AES_xts_decrypt,.-AES_xts_decrypt ___ } $code.=<<___; .string "AES for s390x, CRYPTOGAMS by " +.comm OPENSSL_s390xcap_P,16,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; +close STDOUT; # force flush diff --git a/app/openssl/crypto/aes/asm/aes-sparcv9.pl b/app/openssl/crypto/aes/asm/aes-sparcv9.pl index c57b3a2d..403c4d12 100755 --- a/app/openssl/crypto/aes/asm/aes-sparcv9.pl +++ b/app/openssl/crypto/aes/asm/aes-sparcv9.pl @@ -1176,6 +1176,7 @@ ___ # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have # undesired effect, so just omit them and sacrifice some portion of # percent in performance... -$code =~ s/fmovs.*$//gem; +$code =~ s/fmovs.*$//gm; print $code; +close STDOUT; # ensure flush diff --git a/app/openssl/crypto/aes/asm/aes-x86_64.S b/app/openssl/crypto/aes/asm/aes-x86_64.S new file mode 100644 index 00000000..e385566f --- /dev/null +++ b/app/openssl/crypto/aes/asm/aes-x86_64.S @@ -0,0 +1,2541 @@ +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %dh,%ebp + movzbl %ah,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %dh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ah,%edi + shrl $8,%ecx + shrl $8,%ebx + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + shll $16,%r9d + shll $16,%r13d + shll $16,%ebp + xorl %r9d,%r11d + xorl %r13d,%r12d + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%edi + shll $24,%edx + xorl %esi,%r10d + shll $24,%ecx + xorl %edi,%r11d + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl %eax,%esi + movl %ebx,%edi + andl $2155905152,%esi + andl $2155905152,%edi + movl %esi,%r10d + movl %edi,%r11d + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl %ecx,%esi + movl %edx,%edi + roll $24,%eax + roll $24,%ebx + andl $2155905152,%esi + andl $2155905152,%edi + xorl %r8d,%eax + xorl %r9d,%ebx + movl %esi,%r12d + movl %edi,%ebp + rorl $16,%r10d + rorl $16,%r11d + shrl $7,%r12d + leal (%rcx,%rcx,1),%r8d + xorl %r10d,%eax + xorl %r11d,%ebx + shrl $7,%ebp + leal (%rdx,%rdx,1),%r9d + rorl $8,%r10d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%ecx + xorl %r9d,%edx + roll $24,%ecx + roll $24,%edx + xorl %r8d,%ecx + xorl %r9d,%edx + movl 0(%r14),%esi + rorl $16,%r12d + rorl $16,%ebp + movl 64(%r14),%edi + xorl %r12d,%ecx + xorl %ebp,%edx + movl 128(%r14),%r8d + rorl $8,%r12d + rorl $8,%ebp + movl 192(%r14),%r9d + xorl %r12d,%ecx + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: +AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.size AES_encrypt,.-AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %bh,%ebp + movzbl %ch,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %bh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ch,%edi + shll $16,%r9d + shll $16,%r13d + movzbl (%r14,%rdi,1),%ebx + xorl %r9d,%r11d + xorl %r13d,%r12d + + movzbl %dh,%edi + shrl $8,%eax + shll $16,%ebp + movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rax,1),%edx + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%r10d + shll $24,%edx + xorl %r11d,%ebx + movl %r10d,%eax + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq 0(%r14),%rsi + shrq $32,%r8 + shrq $32,%r11 + movq 64(%r14),%rdi + roll $16,%r9d + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + roll $16,%r11d + movq 192(%r14),%r10 + xorl %r9d,%eax + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: +AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.size AES_decrypt,.-AES_decrypt +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function +.align 16 +private_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Lenc_key_epilogue: + .byte 0xf3,0xc3 +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.align 4 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.align 4 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.align 4 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: +.byte 0xf3,0xc3 +.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function +.align 16 +private_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.align 4 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.align 4 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + + shrq $32,%r8 + shrq $32,%r11 + + roll $16,%r9d + roll $16,%r12d + + roll $16,%r8d + roll $16,%r11d + + xorl %r9d,%eax + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Ldec_key_epilogue: + .byte 0xf3,0xc3 +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 + +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: +AES_cbc_encrypt: + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + cmpq $0,%r9 + jne .Lcbc_picked_te + leaq .LAES_Td(%rip),%r14 +.Lcbc_picked_te: + + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $28,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.align 4 +.Lcbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.align 4 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.align 4 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.align 4 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.align 16 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.align 4 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.align 16 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.align 4 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.align 4 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.align 16 +.Lcbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.align 4 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.align 16 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.align 4 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.align 16 +.Lcbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lcbc_popfq: + popfq +.Lcbc_epilogue: + .byte 0xf3,0xc3 +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.align 64 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.align 64 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/app/openssl/crypto/aes/asm/aes-x86_64.pl b/app/openssl/crypto/aes/asm/aes-x86_64.pl index a545e892..34cbb5d8 100755 --- a/app/openssl/crypto/aes/asm/aes-x86_64.pl +++ b/app/openssl/crypto/aes/asm/aes-x86_64.pl @@ -36,7 +36,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $verticalspin=1; # unlike 32-bit version $verticalspin performs # ~15% better on both AMD and Intel cores @@ -588,6 +589,9 @@ $code.=<<___; .globl AES_encrypt .type AES_encrypt,\@function,3 .align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: AES_encrypt: push %rbx push %rbp @@ -1184,6 +1188,9 @@ $code.=<<___; .globl AES_decrypt .type AES_decrypt,\@function,3 .align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: AES_decrypt: push %rbx push %rbp @@ -1277,13 +1284,13 @@ $code.=<<___; ___ } -# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, +# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,\@function,3 +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,\@function,3 .align 16 -AES_set_encrypt_key: +private_AES_set_encrypt_key: push %rbx push %rbp push %r12 # redundant, but allows to share @@ -1304,7 +1311,7 @@ AES_set_encrypt_key: add \$56,%rsp .Lenc_key_epilogue: ret -.size AES_set_encrypt_key,.-AES_set_encrypt_key +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 @@ -1547,13 +1554,13 @@ $code.=<<___; ___ } -# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, +# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,\@function,3 +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,\@function,3 .align 16 -AES_set_decrypt_key: +private_AES_set_decrypt_key: push %rbx push %rbp push %r12 @@ -1622,7 +1629,7 @@ $code.=<<___; add \$56,%rsp .Ldec_key_epilogue: ret -.size AES_set_decrypt_key,.-AES_set_decrypt_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key ___ # void AES_cbc_encrypt (const void char *inp, unsigned char *out, @@ -1648,6 +1655,9 @@ $code.=<<___; .type AES_cbc_encrypt,\@function,6 .align 16 .extern OPENSSL_ia32cap_P +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: AES_cbc_encrypt: cmp \$0,%rdx # check length je .Lcbc_epilogue @@ -2766,13 +2776,13 @@ cbc_se_handler: .rva .LSEH_end_AES_decrypt .rva .LSEH_info_AES_decrypt - .rva .LSEH_begin_AES_set_encrypt_key - .rva .LSEH_end_AES_set_encrypt_key - .rva .LSEH_info_AES_set_encrypt_key + .rva .LSEH_begin_private_AES_set_encrypt_key + .rva .LSEH_end_private_AES_set_encrypt_key + .rva .LSEH_info_private_AES_set_encrypt_key - .rva .LSEH_begin_AES_set_decrypt_key - .rva .LSEH_end_AES_set_decrypt_key - .rva .LSEH_info_AES_set_decrypt_key + .rva .LSEH_begin_private_AES_set_decrypt_key + .rva .LSEH_end_private_AES_set_decrypt_key + .rva .LSEH_info_private_AES_set_decrypt_key .rva .LSEH_begin_AES_cbc_encrypt .rva .LSEH_end_AES_cbc_encrypt @@ -2788,11 +2798,11 @@ cbc_se_handler: .byte 9,0,0,0 .rva block_se_handler .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_AES_set_encrypt_key: +.LSEH_info_private_AES_set_encrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] -.LSEH_info_AES_set_decrypt_key: +.LSEH_info_private_AES_set_decrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] diff --git a/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.S b/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.S new file mode 100644 index 00000000..32fd600b --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.S @@ -0,0 +1,1396 @@ +.text + + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,@function +.align 16 +aesni_cbc_sha1_enc: + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movl OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc_ssse3,@function +.align 16 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast1 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast1 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast1: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast2 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast2 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast2: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb .Laesenclast3 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast3 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast3: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast4 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast4 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast4: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast5 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast5 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast5: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl b/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl new file mode 100644 index 00000000..3c8f6c19 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -0,0 +1,1250 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# June 2011 +# +# This is AESNI-CBC+SHA1 "stitch" implementation. The idea, as spelled +# in http://download.intel.com/design/intarch/papers/323686.pdf, is +# that since AESNI-CBC encrypt exhibit *very* low instruction-level +# parallelism, interleaving it with another algorithm would allow to +# utilize processor resources better and achieve better performance. +# SHA1 instruction sequences(*) are taken from sha1-x86_64.pl and +# AESNI code is weaved into it. Below are performance numbers in +# cycles per processed byte, less is better, for standalone AESNI-CBC +# encrypt, sum of the latter and standalone SHA1, and "stitched" +# subroutine: +# +# AES-128-CBC +SHA1 stitch gain +# Westmere 3.77[+5.6] 9.37 6.65 +41% +# Sandy Bridge 5.05[+5.2(6.3)] 10.25(11.35) 6.16(7.08) +67%(+60%) +# +# AES-192-CBC +# Westmere 4.51 10.11 6.97 +45% +# Sandy Bridge 6.05 11.25(12.35) 6.34(7.27) +77%(+70%) +# +# AES-256-CBC +# Westmere 5.25 10.85 7.25 +50% +# Sandy Bridge 7.05 12.25(13.35) 7.06(7.70) +74%(+73%) +# +# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for +# background information. Above numbers in parentheses are SSSE3 +# results collected on AVX-capable CPU, i.e. apply on OSes that +# don't support AVX. +# +# Needless to mention that it makes no sense to implement "stitched" +# *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1 +# fully utilize parallelism, so stitching would not give any gain +# anyway. Well, there might be some, e.g. because of better cache +# locality... For reference, here are performance results for +# standalone AESNI-CBC decrypt: +# +# AES-128-CBC AES-192-CBC AES-256-CBC +# Westmere 1.31 1.55 1.80 +# Sandy Bridge 0.93 1.06 1.22 + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/ && + $1>=2.19); +$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && + $1>=2.09); +$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./ && + $1>=10); + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# void aesni_cbc_sha1_enc(const void *inp, +# void *out, +# size_t length, +# const AES_KEY *key, +# unsigned char *iv, +# SHA_CTX *ctx, +# const void *in0); + +$code.=<<___; +.text +.extern OPENSSL_ia32cap_P + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,\@abi-omnipotent +.align 16 +aesni_cbc_sha1_enc: + # caller should check for SSSE3 and AES-NI bits + mov OPENSSL_ia32cap_P+0(%rip),%r10d + mov OPENSSL_ia32cap_P+4(%rip),%r11d +___ +$code.=<<___ if ($avx); + and \$`1<<28`,%r11d # mask AVX bit + and \$`1<<30`,%r10d # mask "Intel CPU" bit + or %r11d,%r10d + cmp \$`1<<28|1<<30`,%r10d + je aesni_cbc_sha1_enc_avx +___ +$code.=<<___; + jmp aesni_cbc_sha1_enc_ssse3 + ret +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +___ + +my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +my $Xi=4; +my @X=map("%xmm$_",(4..7,0..3)); +my @Tx=map("%xmm$_",(8..10)); +my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization +my @T=("%esi","%edi"); +my $j=0; my $jj=0; my $r=0; my $sn=0; +my $K_XX_XX="%r11"; +my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13)); +my @rndkey=("%xmm14","%xmm15"); + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +my $_rol=sub { &rol(@_) }; +my $_ror=sub { &ror(@_) }; + +$code.=<<___; +.type aesni_cbc_sha1_enc_ssse3,\@function,6 +.align 16 +aesni_cbc_sha1_enc_ssse3: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument + #shr \$6,$len # debugging artefact + #jz .Lepilogue_ssse3 # debugging artefact + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + lea `-104-($win64?10*16:0)`(%rsp),%rsp + #mov $in0,$inp # debugging artefact + #lea 64(%rsp),$ctx # debugging artefact +___ +$code.=<<___ if ($win64); + movaps %xmm6,96+0(%rsp) + movaps %xmm7,96+16(%rsp) + movaps %xmm8,96+32(%rsp) + movaps %xmm9,96+48(%rsp) + movaps %xmm10,96+64(%rsp) + movaps %xmm11,96+80(%rsp) + movaps %xmm12,96+96(%rsp) + movaps %xmm13,96+112(%rsp) + movaps %xmm14,96+128(%rsp) + movaps %xmm15,96+144(%rsp) +.Lprologue_ssse3: +___ +$code.=<<___; + mov $in0,%r12 # reassign arguments + mov $out,%r13 + mov $len,%r14 + mov $key,%r15 + movdqu ($ivp),$iv # load IV + mov $ivp,88(%rsp) # save $ivp +___ +my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +my $rounds="${ivp}d"; +$code.=<<___; + shl \$6,$len + sub $in0,$out + mov 240($key),$rounds + add $inp,$len # end of input + + lea K_XX_XX(%rip),$K_XX_XX + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + + movdqa 64($K_XX_XX),@X[2] # pbswap mask + movdqa 0($K_XX_XX),@Tx[1] # K_00_19 + movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + movdqu 16($inp),@X[-3&7] + movdqu 32($inp),@X[-2&7] + movdqu 48($inp),@X[-1&7] + pshufb @X[2],@X[-4&7] # byte swap + add \$64,$inp + pshufb @X[2],@X[-3&7] + pshufb @X[2],@X[-2&7] + pshufb @X[2],@X[-1&7] + paddd @Tx[1],@X[-4&7] # add K_00_19 + paddd @Tx[1],@X[-3&7] + paddd @Tx[1],@X[-2&7] + movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU + psubd @Tx[1],@X[-4&7] # restore X[] + movdqa @X[-3&7],16(%rsp) + psubd @Tx[1],@X[-3&7] + movdqa @X[-2&7],32(%rsp) + psubd @Tx[1],@X[-2&7] + movups ($key),$rndkey0 # $key[0] + movups 16($key),$rndkey[0] # forward reference + jmp .Loop_ssse3 +___ + +my $aesenc=sub { + use integer; + my ($n,$k)=($r/10,$r%10); + if ($k==0) { + $code.=<<___; + movups `16*$n`($in0),$in # load input + xorps $rndkey0,$in +___ + $code.=<<___ if ($n); + movups $iv,`16*($n-1)`($out,$in0) # write output +___ + $code.=<<___; + xorps $in,$iv + aesenc $rndkey[0],$iv + movups `32+16*$k`($key),$rndkey[1] +___ + } elsif ($k==9) { + $sn++; + $code.=<<___; + cmp \$11,$rounds + jb .Laesenclast$sn + movups `32+16*($k+0)`($key),$rndkey[1] + aesenc $rndkey[0],$iv + movups `32+16*($k+1)`($key),$rndkey[0] + aesenc $rndkey[1],$iv + je .Laesenclast$sn + movups `32+16*($k+2)`($key),$rndkey[1] + aesenc $rndkey[0],$iv + movups `32+16*($k+3)`($key),$rndkey[0] + aesenc $rndkey[1],$iv +.Laesenclast$sn: + aesenclast $rndkey[0],$iv + movups 16($key),$rndkey[1] # forward reference +___ + } else { + $code.=<<___; + aesenc $rndkey[0],$iv + movups `32+16*$k`($key),$rndkey[1] +___ + } + $r++; unshift(@rndkey,pop(@rndkey)); +}; + +sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + &movdqa (@X[0],@X[-3&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[0],@X[-1&7]); + &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &psrldq (@Tx[0],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (@Tx[2],@X[0]); + &movdqa (@Tx[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword + &paddd (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@Tx[0],31); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[1],@Tx[2]); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@Tx[2],30); + &por (@X[0],@Tx[0]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pslld (@Tx[1],2); + &pxor (@X[0],@Tx[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xupdate_ssse3_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); + eval(shift(@insns)); # body_20_39 + &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + if ($Xi%5) { + &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... + } else { # ... or load next one + &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); + } + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &movdqa (@Tx[0],@X[0]); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pslld (@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &psrld (@Tx[0],30); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &por (@X[0],@Tx[0]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &movdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xuplast_ssse3_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &cmp ($inp,$len); + &je (".Ldone_ssse3"); + + unshift(@Tx,pop(@Tx)); + + &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask + &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 + &movdqu (@X[-4&7],"0($inp)"); # load input + &movdqu (@X[-3&7],"16($inp)"); + &movdqu (@X[-2&7],"32($inp)"); + &movdqu (@X[-1&7],"48($inp)"); + &pshufb (@X[-4&7],@X[2]); # byte swap + &add ($inp,64); + + $Xi=0; +} + +sub Xloop_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &pshufb (@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[($Xi-4)&7],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + &psubd (@X[($Xi-4)&7],@Tx[1]); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +sub body_00_19 () { + use integer; + my ($k,$n); + my @r=( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer + '&xor ($c,$d);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&xor ($c,$d);', # restore $c + '&xor (@T[0],$d);', + '&add ($e,$a);', + '&$_ror ($b,$j?7:2);', # $b>>>2 + '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); + $n = scalar(@r); + $k = (($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds + @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); + $jj++; + return @r; +} + +sub body_20_39 () { + use integer; + my ($k,$n); + my @r=( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer + '&xor (@T[0],$d);', # ($b^$d) + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&xor (@T[0],$c);', # ($b^$d^$c) + '&add ($e,$a);', + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); + $n = scalar(@r); + $k = (($jj+1)*8/20)*20*$n/8; # 8 aesencs per these 20 rounds + @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); + $jj++; + return @r; +} + +sub body_40_59 () { + use integer; + my ($k,$n); + my @r=( + '($a,$b,$c,$d,$e)=@V;'. + '&mov (@T[1],$c);', + '&xor ($c,$d);', + '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer + '&and (@T[1],$d);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[1]);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&add ($e,@T[0]);', + '&xor ($c,$d);', # restore $c + '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); + $n = scalar(@r); + $k=(($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds + @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); + $jj++; + return @r; +} +$code.=<<___; +.align 16 +.Loop_ssse3: +___ + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + $saved_r=$r; @saved_rndkey=@rndkey; + + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + +$code.=<<___; + movups $iv,48($out,$in0) # write output + lea 64($in0),$in0 + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: +___ + $jj=$j=$saved_j; @V=@saved_V; + $r=$saved_r; @rndkey=@saved_rndkey; + + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + +$code.=<<___; + movups $iv,48($out,$in0) # write output + mov 88(%rsp),$ivp # restore $ivp + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + movups $iv,($ivp) # write IV +___ +$code.=<<___ if ($win64); + movaps 96+0(%rsp),%xmm6 + movaps 96+16(%rsp),%xmm7 + movaps 96+32(%rsp),%xmm8 + movaps 96+48(%rsp),%xmm9 + movaps 96+64(%rsp),%xmm10 + movaps 96+80(%rsp),%xmm11 + movaps 96+96(%rsp),%xmm12 + movaps 96+112(%rsp),%xmm13 + movaps 96+128(%rsp),%xmm14 + movaps 96+144(%rsp),%xmm15 +___ +$code.=<<___; + lea `104+($win64?10*16:0)`(%rsp),%rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_ssse3: + ret +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +___ + +$j=$jj=$r=$sn=0; + +if ($avx) { +my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +my $Xi=4; +my @X=map("%xmm$_",(4..7,0..3)); +my @Tx=map("%xmm$_",(8..10)); +my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization +my @T=("%esi","%edi"); + +my $_rol=sub { &shld(@_[0],@_) }; +my $_ror=sub { &shrd(@_[0],@_) }; + +$code.=<<___; +.type aesni_cbc_sha1_enc_avx,\@function,6 +.align 16 +aesni_cbc_sha1_enc_avx: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument + #shr \$6,$len # debugging artefact + #jz .Lepilogue_avx # debugging artefact + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + lea `-104-($win64?10*16:0)`(%rsp),%rsp + #mov $in0,$inp # debugging artefact + #lea 64(%rsp),$ctx # debugging artefact +___ +$code.=<<___ if ($win64); + movaps %xmm6,96+0(%rsp) + movaps %xmm7,96+16(%rsp) + movaps %xmm8,96+32(%rsp) + movaps %xmm9,96+48(%rsp) + movaps %xmm10,96+64(%rsp) + movaps %xmm11,96+80(%rsp) + movaps %xmm12,96+96(%rsp) + movaps %xmm13,96+112(%rsp) + movaps %xmm14,96+128(%rsp) + movaps %xmm15,96+144(%rsp) +.Lprologue_avx: +___ +$code.=<<___; + vzeroall + mov $in0,%r12 # reassign arguments + mov $out,%r13 + mov $len,%r14 + mov $key,%r15 + vmovdqu ($ivp),$iv # load IV + mov $ivp,88(%rsp) # save $ivp +___ +my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +my $rounds="${ivp}d"; +$code.=<<___; + shl \$6,$len + sub $in0,$out + mov 240($key),$rounds + add \$112,$key # size optimization + add $inp,$len # end of input + + lea K_XX_XX(%rip),$K_XX_XX + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + + vmovdqa 64($K_XX_XX),@X[2] # pbswap mask + vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 + vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + vmovdqu 16($inp),@X[-3&7] + vmovdqu 32($inp),@X[-2&7] + vmovdqu 48($inp),@X[-1&7] + vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap + add \$64,$inp + vpshufb @X[2],@X[-3&7],@X[-3&7] + vpshufb @X[2],@X[-2&7],@X[-2&7] + vpshufb @X[2],@X[-1&7],@X[-1&7] + vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 + vpaddd @Tx[1],@X[-3&7],@X[1] + vpaddd @Tx[1],@X[-2&7],@X[2] + vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU + vmovdqa @X[1],16(%rsp) + vmovdqa @X[2],32(%rsp) + vmovups -112($key),$rndkey0 # $key[0] + vmovups 16-112($key),$rndkey[0] # forward reference + jmp .Loop_avx +___ + +my $aesenc=sub { + use integer; + my ($n,$k)=($r/10,$r%10); + if ($k==0) { + $code.=<<___; + vmovups `16*$n`($in0),$in # load input + vxorps $rndkey0,$in,$in +___ + $code.=<<___ if ($n); + vmovups $iv,`16*($n-1)`($out,$in0) # write output +___ + $code.=<<___; + vxorps $in,$iv,$iv + vaesenc $rndkey[0],$iv,$iv + vmovups `32+16*$k-112`($key),$rndkey[1] +___ + } elsif ($k==9) { + $sn++; + $code.=<<___; + cmp \$11,$rounds + jb .Lvaesenclast$sn + vaesenc $rndkey[0],$iv,$iv + vmovups `32+16*($k+0)-112`($key),$rndkey[1] + vaesenc $rndkey[1],$iv,$iv + vmovups `32+16*($k+1)-112`($key),$rndkey[0] + je .Lvaesenclast$sn + vaesenc $rndkey[0],$iv,$iv + vmovups `32+16*($k+2)-112`($key),$rndkey[1] + vaesenc $rndkey[1],$iv,$iv + vmovups `32+16*($k+3)-112`($key),$rndkey[0] +.Lvaesenclast$sn: + vaesenclast $rndkey[0],$iv,$iv + vmovups 16-112($key),$rndkey[1] # forward reference +___ + } else { + $code.=<<___; + vaesenc $rndkey[0],$iv,$iv + vmovups `32+16*$k-112`($key),$rndkey[1] +___ + } + $r++; unshift(@rndkey,pop(@rndkey)); +}; + +sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[0],@X[0],31); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword + &vpaddd (@X[0],@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[1],@Tx[2],30); + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslld (@Tx[2],@Tx[2],2); + &vpxor (@X[0],@X[0],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xupdate_avx_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + if ($Xi%5) { + &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... + } else { # ... or load next one + &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); + } + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpsrld (@Tx[0],@X[0],30); + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpslld (@X[0],@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xuplast_avx_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &cmp ($inp,$len); + &je (".Ldone_avx"); + + unshift(@Tx,pop(@Tx)); + + &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask + &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 + &vmovdqu(@X[-4&7],"0($inp)"); # load input + &vmovdqu(@X[-3&7],"16($inp)"); + &vmovdqu(@X[-2&7],"32($inp)"); + &vmovdqu(@X[-1&7],"48($inp)"); + &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap + &add ($inp,64); + + $Xi=0; +} + +sub Xloop_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +$code.=<<___; +.align 16 +.Loop_avx: +___ + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_32_79(\&body_00_19); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_20_39); + &Xuplast_avx_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + $saved_r=$r; @saved_rndkey=@rndkey; + + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + +$code.=<<___; + vmovups $iv,48($out,$in0) # write output + lea 64($in0),$in0 + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + jmp .Loop_avx + +.align 16 +.Ldone_avx: +___ + $jj=$j=$saved_j; @V=@saved_V; + $r=$saved_r; @rndkey=@saved_rndkey; + + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + +$code.=<<___; + vmovups $iv,48($out,$in0) # write output + mov 88(%rsp),$ivp # restore $ivp + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + vmovups $iv,($ivp) # write IV + vzeroall +___ +$code.=<<___ if ($win64); + movaps 96+0(%rsp),%xmm6 + movaps 96+16(%rsp),%xmm7 + movaps 96+32(%rsp),%xmm8 + movaps 96+48(%rsp),%xmm9 + movaps 96+64(%rsp),%xmm10 + movaps 96+80(%rsp),%xmm11 + movaps 96+96(%rsp),%xmm12 + movaps 96+112(%rsp),%xmm13 + movaps 96+128(%rsp),%xmm14 + movaps 96+144(%rsp),%xmm15 +___ +$code.=<<___; + lea `104+($win64?10*16:0)`(%rsp),%rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_avx: + ret +.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx +___ +} +$code.=<<___; +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask + +.asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by " +.align 64 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type ssse3_handler,\@abi-omnipotent +.align 16 +ssse3_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + lea 96(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + lea `104+10*16`(%rax),%rax # adjust stack pointer + + mov 0(%rax),%r15 + mov 8(%rax),%r14 + mov 16(%rax),%r13 + mov 24(%rax),%r12 + mov 32(%rax),%rbp + mov 40(%rax),%rbx + lea 48(%rax),%rax + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size ssse3_handler,.-ssse3_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3 + .rva .LSEH_end_aesni_cbc_sha1_enc_ssse3 + .rva .LSEH_info_aesni_cbc_sha1_enc_ssse3 +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_aesni_cbc_sha1_enc_avx + .rva .LSEH_end_aesni_cbc_sha1_enc_avx + .rva .LSEH_info_aesni_cbc_sha1_enc_avx +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_aesni_cbc_sha1_enc_ssse3: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_aesni_cbc_sha1_enc_avx: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] +___ +} + +#################################################################### +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + push @opcode,$rex|0x40 if($rex); +} + +sub aesni { + my $line=shift; + my @opcode=(0x66); + + if ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my %opcodelet = ( + "aesenc" => 0xdc, "aesenclast" => 0xdd + ); + return undef if (!defined($opcodelet{$1})); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x38,$opcodelet{$1}; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } + return $line; +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; + +print $code; +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/aesni-x86.S b/app/openssl/crypto/aes/asm/aesni-x86.S new file mode 100644 index 00000000..0766bb54 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-x86.S @@ -0,0 +1,2143 @@ +.file "crypto/aes/asm/aesni-x86.s" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L002enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 + movups (%edx),%xmm0 + jnz .L002enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L003dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 + movups (%edx),%xmm0 + jnz .L003dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L004enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%edx),%xmm0 + jnz .L004enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L005dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%edx),%xmm0 + jnz .L005dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + jmp .L_aesni_encrypt6_enter +.align 16 +.L006enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.align 16 +.L_aesni_encrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%edx),%xmm0 + jnz .L006enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + jmp .L_aesni_decrypt6_enter +.align 16 +.L007dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.align 16 +.L_aesni_decrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%edx),%xmm0 + jnz .L007dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L008ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L009ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L010ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L011ecb_enc_loop6_enter +.align 16 +.L012ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L011ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L012ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L010ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L013ecb_enc_one + movups 16(%esi),%xmm3 + je .L014ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L015ecb_enc_three + movups 48(%esi),%xmm5 + je .L016ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L013ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L017enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L017enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L014ecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L015ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L016ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L008ecb_ret +.align 16 +.L009ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L018ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L019ecb_dec_loop6_enter +.align 16 +.L020ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L019ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L020ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L018ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L021ecb_dec_one + movups 16(%esi),%xmm3 + je .L022ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L023ecb_dec_three + movups 48(%esi),%xmm5 + je .L024ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L021ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L025dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L025dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L022ecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L023ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L024ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L008ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shrl $1,%ecx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %ecx,%ebx +.byte 102,15,56,0,253 +.L026ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + leal 32(%ebp),%edx + xorps %xmm0,%xmm3 + movups (%edx),%xmm0 +.L027ccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L027ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + decl %eax + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + jnz .L026ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L028enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L028enc1_loop_5 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + jmp .L029ccm64_dec_outer +.align 16 +.L029ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movl %ebx,%ecx + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L030ccm64_dec_break + movups (%ebp),%xmm0 + shrl $1,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%ebp),%edx + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups (%edx),%xmm0 +.L031ccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L031ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leal 16(%esi),%esi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp .L029ccm64_dec_outer +.align 16 +.L030ccm64_dec_break: + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L032enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L033ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm0 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,203,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,197,0 + incl %ebx +.byte 102,15,58,34,203,1 + incl %ebp +.byte 102,15,58,34,197,1 + incl %ebx +.byte 102,15,58,34,203,2 + incl %ebp +.byte 102,15,58,34,197,2 + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + pshufd $192,%xmm1,%xmm2 + pshufd $128,%xmm1,%xmm3 + cmpl $6,%eax + jb .L034ctr32_tail + movdqa %xmm7,32(%esp) + shrl $1,%ecx + movl %edx,%ebp + movl %ecx,%ebx + subl $6,%eax + jmp .L035ctr32_loop6 +.align 16 +.L035ctr32_loop6: + pshufd $64,%xmm1,%xmm4 + movdqa 32(%esp),%xmm1 + pshufd $192,%xmm0,%xmm5 + por %xmm1,%xmm2 + pshufd $128,%xmm0,%xmm6 + por %xmm1,%xmm3 + pshufd $64,%xmm0,%xmm7 + por %xmm1,%xmm4 + por %xmm1,%xmm5 + por %xmm1,%xmm6 + por %xmm1,%xmm7 + movups (%ebp),%xmm0 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + decl %ecx + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 48(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 64(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + movups %xmm6,64(%edi) + pshufd $192,%xmm1,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movl %ebx,%ecx + pshufd $128,%xmm1,%xmm3 + subl $6,%eax + jnc .L035ctr32_loop6 + addl $6,%eax + jz .L036ctr32_ret + movl %ebp,%edx + leal 1(,%ecx,2),%ecx + movdqa 32(%esp),%xmm7 +.L034ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L037ctr32_one + pshufd $64,%xmm1,%xmm4 + por %xmm7,%xmm3 + je .L038ctr32_two + pshufd $192,%xmm0,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L039ctr32_three + pshufd $128,%xmm0,%xmm6 + por %xmm7,%xmm5 + je .L040ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L036ctr32_ret +.align 16 +.L033ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L037ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L041enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L041enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L036ctr32_ret +.align 16 +.L038ctr32_two: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L036ctr32_ret +.align 16 +.L039ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L036ctr32_ret +.align 16 +.L040ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L036ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L042enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L042enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L043xts_enc_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L044xts_enc_loop6 +.align 16 +.L044xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,220,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L044xts_enc_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L043xts_enc_short: + addl $96,%eax + jz .L045xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L046xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L047xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L048xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L049xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L050xts_enc_done +.align 16 +.L046xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L051enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L051enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L047xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L048xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L049xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L045xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L052xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L053xts_enc_steal +.align 16 +.L050xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L052xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L053xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L053xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L054enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L054enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L052xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L056xts_dec_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L057xts_dec_loop6 +.align 16 +.L057xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,222,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L057xts_dec_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L056xts_dec_short: + addl $96,%eax + jz .L058xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L059xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L060xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L061xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L062xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L063xts_dec_done +.align 16 +.L059xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L064dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L064dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L060xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L061xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L062xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L058xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L065xts_dec_ret + movl %eax,112(%esp) + jmp .L066xts_dec_only_one_more +.align 16 +.L063xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L065xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L066xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L067dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L067dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L068xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L068xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L069dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L069dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L065xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L070cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L071cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L072cbc_enc_tail + subl $16,%eax + jmp .L073cbc_enc_loop +.align 16 +.L073cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L074enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L074enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L073cbc_enc_loop + addl $16,%eax + jnz .L072cbc_enc_tail + movaps %xmm2,%xmm7 + jmp .L075cbc_ret +.L072cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L073cbc_enc_loop +.align 16 +.L071cbc_decrypt: + cmpl $80,%eax + jbe .L076cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L077cbc_dec_loop6_enter +.align 16 +.L078cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L077cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L078cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L079cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L076cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L080cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L081cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L082cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L083cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L080cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L084dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L084dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L081cbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L082cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L083cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +.L079cbc_dec_tail_collected: + andl $15,%eax + jnz .L085cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp .L075cbc_ret +.align 16 +.L085cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +.L075cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +.L070cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + testl %eax,%eax + jz .L086bad_pointer + testl %edx,%edx + jz .L086bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je .L08714rounds + cmpl $192,%ecx + je .L08812rounds + cmpl $128,%ecx + jne .L089bad_keybits +.align 16 +.L09010rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L091key_128_cold +.byte 102,15,58,223,200,2 + call .L092key_128 +.byte 102,15,58,223,200,4 + call .L092key_128 +.byte 102,15,58,223,200,8 + call .L092key_128 +.byte 102,15,58,223,200,16 + call .L092key_128 +.byte 102,15,58,223,200,32 + call .L092key_128 +.byte 102,15,58,223,200,64 + call .L092key_128 +.byte 102,15,58,223,200,128 + call .L092key_128 +.byte 102,15,58,223,200,27 + call .L092key_128 +.byte 102,15,58,223,200,54 + call .L092key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 16 +.L092key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L091key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L08812rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L093key_192a_cold +.byte 102,15,58,223,202,2 + call .L094key_192b +.byte 102,15,58,223,202,4 + call .L095key_192a +.byte 102,15,58,223,202,8 + call .L094key_192b +.byte 102,15,58,223,202,16 + call .L095key_192a +.byte 102,15,58,223,202,32 + call .L094key_192b +.byte 102,15,58,223,202,64 + call .L095key_192a +.byte 102,15,58,223,202,128 + call .L094key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 16 +.L095key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L093key_192a_cold: + movaps %xmm2,%xmm5 +.L096key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L094key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L096key_192b_warm +.align 16 +.L08714rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L097key_256a_cold +.byte 102,15,58,223,200,1 + call .L098key_256b +.byte 102,15,58,223,202,2 + call .L099key_256a +.byte 102,15,58,223,200,2 + call .L098key_256b +.byte 102,15,58,223,202,4 + call .L099key_256a +.byte 102,15,58,223,200,4 + call .L098key_256b +.byte 102,15,58,223,202,8 + call .L099key_256a +.byte 102,15,58,223,200,8 + call .L098key_256b +.byte 102,15,58,223,202,16 + call .L099key_256a +.byte 102,15,58,223,200,16 + call .L098key_256b +.byte 102,15,58,223,202,32 + call .L099key_256a +.byte 102,15,58,223,200,32 + call .L098key_256b +.byte 102,15,58,223,202,64 + call .L099key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 16 +.L099key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L097key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L098key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4 +.L086bad_pointer: + movl $-1,%eax + ret +.align 4 +.L089bad_keybits: + movl $-2,%eax + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L100dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L101dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L101dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +.L100dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/app/openssl/crypto/aes/asm/aesni-x86.pl b/app/openssl/crypto/aes/asm/aesni-x86.pl new file mode 100644 index 00000000..3dc345b5 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-x86.pl @@ -0,0 +1,2189 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for Intel AES-NI extension. In +# OpenSSL context it's used with Intel engine, but can also be used as +# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for +# details]. +# +# Performance. +# +# To start with see corresponding paragraph in aesni-x86_64.pl... +# Instead of filling table similar to one found there I've chosen to +# summarize *comparison* results for raw ECB, CTR and CBC benchmarks. +# The simplified table below represents 32-bit performance relative +# to 64-bit one in every given point. Ratios vary for different +# encryption modes, therefore interval values. +# +# 16-byte 64-byte 256-byte 1-KB 8-KB +# 53-67% 67-84% 91-94% 95-98% 97-99.5% +# +# Lower ratios for smaller block sizes are perfectly understandable, +# because function call overhead is higher in 32-bit mode. Largest +# 8-KB block performance is virtually same: 32-bit code is less than +# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise. + +# January 2011 +# +# See aesni-x86_64.pl for details. Unlike x86_64 version this module +# interleaves at most 6 aes[enc|dec] instructions, because there are +# not enough registers for 8x interleave [which should be optimal for +# Sandy Bridge]. Actually, performance results for 6x interleave +# factor presented in aesni-x86_64.pl (except for CTR) are for this +# module. + +# April 2011 +# +# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing +# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. + +$PREFIX="aesni"; # if $PREFIX is set to "AES", the script + # generates drop-in replacement for + # crypto/aes/asm/aes-586.pl:-) +$inline=1; # inline _aesni_[en|de]crypt + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],$0); + +if ($PREFIX eq "aesni") { $movekey=*movups; } +else { $movekey=*movups; } + +$len="eax"; +$rounds="ecx"; +$key="edx"; +$inp="esi"; +$out="edi"; +$rounds_="ebx"; # backup copy for $rounds +$key_="ebp"; # backup copy for $key + +$rndkey0="xmm0"; +$rndkey1="xmm1"; +$inout0="xmm2"; +$inout1="xmm3"; +$inout2="xmm4"; +$inout3="xmm5"; $in1="xmm5"; +$inout4="xmm6"; $in0="xmm6"; +$inout5="xmm7"; $ivec="xmm7"; + +# AESNI extenstion +sub aeskeygenassist +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); } +} +sub aescommon +{ my($opcodelet,$dst,$src)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);} +} +sub aesimc { aescommon(0xdb,@_); } +sub aesenc { aescommon(0xdc,@_); } +sub aesenclast { aescommon(0xdd,@_); } +sub aesdec { aescommon(0xde,@_); } +sub aesdeclast { aescommon(0xdf,@_); } + +# Inline version of internal aesni_[en|de]crypt1 +{ my $sn; +sub aesni_inline_generate1 +{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); + $sn++; + + &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey1,&QWP(16,$key)); + &xorps ($ivec,$rndkey0) if (defined($ivec)); + &lea ($key,&DWP(32,$key)); + &xorps ($inout,$ivec) if (defined($ivec)); + &xorps ($inout,$rndkey0) if (!defined($ivec)); + &set_label("${p}1_loop_$sn"); + eval"&aes${p} ($inout,$rndkey1)"; + &dec ($rounds); + &$movekey ($rndkey1,&QWP(0,$key)); + &lea ($key,&DWP(16,$key)); + &jnz (&label("${p}1_loop_$sn")); + eval"&aes${p}last ($inout,$rndkey1)"; +}} + +sub aesni_generate1 # fully unrolled loop +{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout)); + + &function_begin_B("_aesni_${p}rypt1"); + &movups ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey1,&QWP(0x10,$key)); + &xorps ($inout,$rndkey0); + &$movekey ($rndkey0,&QWP(0x20,$key)); + &lea ($key,&DWP(0x30,$key)); + &cmp ($rounds,11); + &jb (&label("${p}128")); + &lea ($key,&DWP(0x20,$key)); + &je (&label("${p}192")); + &lea ($key,&DWP(0x20,$key)); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(-0x40,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(-0x30,$key)); + &set_label("${p}192"); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(-0x20,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(-0x10,$key)); + &set_label("${p}128"); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(0,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0x10,$key)); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(0x20,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0x30,$key)); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(0x40,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0x50,$key)); + eval"&aes${p} ($inout,$rndkey1)"; + &$movekey ($rndkey1,&QWP(0x60,$key)); + eval"&aes${p} ($inout,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0x70,$key)); + eval"&aes${p} ($inout,$rndkey1)"; + eval"&aes${p}last ($inout,$rndkey0)"; + &ret(); + &function_end_B("_aesni_${p}rypt1"); +} + +# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); +&aesni_generate1("enc") if (!$inline); +&function_begin_B("${PREFIX}_encrypt"); + &mov ("eax",&wparam(0)); + &mov ($key,&wparam(2)); + &movups ($inout0,&QWP(0,"eax")); + &mov ($rounds,&DWP(240,$key)); + &mov ("eax",&wparam(1)); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &movups (&QWP(0,"eax"),$inout0); + &ret (); +&function_end_B("${PREFIX}_encrypt"); + +# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); +&aesni_generate1("dec") if(!$inline); +&function_begin_B("${PREFIX}_decrypt"); + &mov ("eax",&wparam(0)); + &mov ($key,&wparam(2)); + &movups ($inout0,&QWP(0,"eax")); + &mov ($rounds,&DWP(240,$key)); + &mov ("eax",&wparam(1)); + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &movups (&QWP(0,"eax"),$inout0); + &ret (); +&function_end_B("${PREFIX}_decrypt"); + +# _aesni_[en|de]cryptN are private interfaces, N denotes interleave +# factor. Why 3x subroutine were originally used in loops? Even though +# aes[enc|dec] latency was originally 6, it could be scheduled only +# every *2nd* cycle. Thus 3x interleave was the one providing optimal +# utilization, i.e. when subroutine's throughput is virtually same as +# of non-interleaved subroutine [for number of input blocks up to 3]. +# This is why it makes no sense to implement 2x subroutine. +# aes[enc|dec] latency in next processor generation is 8, but the +# instructions can be scheduled every cycle. Optimal interleave for +# new processor is therefore 8x, but it's unfeasible to accommodate it +# in XMM registers addreassable in 32-bit mode and therefore 6x is +# used instead... + +sub aesni_generate3 +{ my $p=shift; + + &function_begin_B("_aesni_${p}rypt3"); + &$movekey ($rndkey0,&QWP(0,$key)); + &shr ($rounds,1); + &$movekey ($rndkey1,&QWP(16,$key)); + &lea ($key,&DWP(32,$key)); + &xorps ($inout0,$rndkey0); + &pxor ($inout1,$rndkey0); + &pxor ($inout2,$rndkey0); + &$movekey ($rndkey0,&QWP(0,$key)); + + &set_label("${p}3_loop"); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + &dec ($rounds); + eval"&aes${p} ($inout2,$rndkey1)"; + &$movekey ($rndkey1,&QWP(16,$key)); + eval"&aes${p} ($inout0,$rndkey0)"; + eval"&aes${p} ($inout1,$rndkey0)"; + &lea ($key,&DWP(32,$key)); + eval"&aes${p} ($inout2,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0,$key)); + &jnz (&label("${p}3_loop")); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + eval"&aes${p} ($inout2,$rndkey1)"; + eval"&aes${p}last ($inout0,$rndkey0)"; + eval"&aes${p}last ($inout1,$rndkey0)"; + eval"&aes${p}last ($inout2,$rndkey0)"; + &ret(); + &function_end_B("_aesni_${p}rypt3"); +} + +# 4x interleave is implemented to improve small block performance, +# most notably [and naturally] 4 block by ~30%. One can argue that one +# should have implemented 5x as well, but improvement would be <20%, +# so it's not worth it... +sub aesni_generate4 +{ my $p=shift; + + &function_begin_B("_aesni_${p}rypt4"); + &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey1,&QWP(16,$key)); + &shr ($rounds,1); + &lea ($key,&DWP(32,$key)); + &xorps ($inout0,$rndkey0); + &pxor ($inout1,$rndkey0); + &pxor ($inout2,$rndkey0); + &pxor ($inout3,$rndkey0); + &$movekey ($rndkey0,&QWP(0,$key)); + + &set_label("${p}4_loop"); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + &dec ($rounds); + eval"&aes${p} ($inout2,$rndkey1)"; + eval"&aes${p} ($inout3,$rndkey1)"; + &$movekey ($rndkey1,&QWP(16,$key)); + eval"&aes${p} ($inout0,$rndkey0)"; + eval"&aes${p} ($inout1,$rndkey0)"; + &lea ($key,&DWP(32,$key)); + eval"&aes${p} ($inout2,$rndkey0)"; + eval"&aes${p} ($inout3,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0,$key)); + &jnz (&label("${p}4_loop")); + + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + eval"&aes${p} ($inout2,$rndkey1)"; + eval"&aes${p} ($inout3,$rndkey1)"; + eval"&aes${p}last ($inout0,$rndkey0)"; + eval"&aes${p}last ($inout1,$rndkey0)"; + eval"&aes${p}last ($inout2,$rndkey0)"; + eval"&aes${p}last ($inout3,$rndkey0)"; + &ret(); + &function_end_B("_aesni_${p}rypt4"); +} + +sub aesni_generate6 +{ my $p=shift; + + &function_begin_B("_aesni_${p}rypt6"); + &static_label("_aesni_${p}rypt6_enter"); + &$movekey ($rndkey0,&QWP(0,$key)); + &shr ($rounds,1); + &$movekey ($rndkey1,&QWP(16,$key)); + &lea ($key,&DWP(32,$key)); + &xorps ($inout0,$rndkey0); + &pxor ($inout1,$rndkey0); # pxor does better here + eval"&aes${p} ($inout0,$rndkey1)"; + &pxor ($inout2,$rndkey0); + eval"&aes${p} ($inout1,$rndkey1)"; + &pxor ($inout3,$rndkey0); + &dec ($rounds); + eval"&aes${p} ($inout2,$rndkey1)"; + &pxor ($inout4,$rndkey0); + eval"&aes${p} ($inout3,$rndkey1)"; + &pxor ($inout5,$rndkey0); + eval"&aes${p} ($inout4,$rndkey1)"; + &$movekey ($rndkey0,&QWP(0,$key)); + eval"&aes${p} ($inout5,$rndkey1)"; + &jmp (&label("_aesni_${p}rypt6_enter")); + + &set_label("${p}6_loop",16); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + &dec ($rounds); + eval"&aes${p} ($inout2,$rndkey1)"; + eval"&aes${p} ($inout3,$rndkey1)"; + eval"&aes${p} ($inout4,$rndkey1)"; + eval"&aes${p} ($inout5,$rndkey1)"; + &set_label("_aesni_${p}rypt6_enter",16); + &$movekey ($rndkey1,&QWP(16,$key)); + eval"&aes${p} ($inout0,$rndkey0)"; + eval"&aes${p} ($inout1,$rndkey0)"; + &lea ($key,&DWP(32,$key)); + eval"&aes${p} ($inout2,$rndkey0)"; + eval"&aes${p} ($inout3,$rndkey0)"; + eval"&aes${p} ($inout4,$rndkey0)"; + eval"&aes${p} ($inout5,$rndkey0)"; + &$movekey ($rndkey0,&QWP(0,$key)); + &jnz (&label("${p}6_loop")); + + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + eval"&aes${p} ($inout2,$rndkey1)"; + eval"&aes${p} ($inout3,$rndkey1)"; + eval"&aes${p} ($inout4,$rndkey1)"; + eval"&aes${p} ($inout5,$rndkey1)"; + eval"&aes${p}last ($inout0,$rndkey0)"; + eval"&aes${p}last ($inout1,$rndkey0)"; + eval"&aes${p}last ($inout2,$rndkey0)"; + eval"&aes${p}last ($inout3,$rndkey0)"; + eval"&aes${p}last ($inout4,$rndkey0)"; + eval"&aes${p}last ($inout5,$rndkey0)"; + &ret(); + &function_end_B("_aesni_${p}rypt6"); +} +&aesni_generate3("enc") if ($PREFIX eq "aesni"); +&aesni_generate3("dec"); +&aesni_generate4("enc") if ($PREFIX eq "aesni"); +&aesni_generate4("dec"); +&aesni_generate6("enc") if ($PREFIX eq "aesni"); +&aesni_generate6("dec"); + +if ($PREFIX eq "aesni") { +###################################################################### +# void aesni_ecb_encrypt (const void *in, void *out, +# size_t length, const AES_KEY *key, +# int enc); +&function_begin("aesni_ecb_encrypt"); + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); + &mov ($rounds_,&wparam(4)); + &and ($len,-16); + &jz (&label("ecb_ret")); + &mov ($rounds,&DWP(240,$key)); + &test ($rounds_,$rounds_); + &jz (&label("ecb_decrypt")); + + &mov ($key_,$key); # backup $key + &mov ($rounds_,$rounds); # backup $rounds + &cmp ($len,0x60); + &jb (&label("ecb_enc_tail")); + + &movdqu ($inout0,&QWP(0,$inp)); + &movdqu ($inout1,&QWP(0x10,$inp)); + &movdqu ($inout2,&QWP(0x20,$inp)); + &movdqu ($inout3,&QWP(0x30,$inp)); + &movdqu ($inout4,&QWP(0x40,$inp)); + &movdqu ($inout5,&QWP(0x50,$inp)); + &lea ($inp,&DWP(0x60,$inp)); + &sub ($len,0x60); + &jmp (&label("ecb_enc_loop6_enter")); + +&set_label("ecb_enc_loop6",16); + &movups (&QWP(0,$out),$inout0); + &movdqu ($inout0,&QWP(0,$inp)); + &movups (&QWP(0x10,$out),$inout1); + &movdqu ($inout1,&QWP(0x10,$inp)); + &movups (&QWP(0x20,$out),$inout2); + &movdqu ($inout2,&QWP(0x20,$inp)); + &movups (&QWP(0x30,$out),$inout3); + &movdqu ($inout3,&QWP(0x30,$inp)); + &movups (&QWP(0x40,$out),$inout4); + &movdqu ($inout4,&QWP(0x40,$inp)); + &movups (&QWP(0x50,$out),$inout5); + &lea ($out,&DWP(0x60,$out)); + &movdqu ($inout5,&QWP(0x50,$inp)); + &lea ($inp,&DWP(0x60,$inp)); +&set_label("ecb_enc_loop6_enter"); + + &call ("_aesni_encrypt6"); + + &mov ($key,$key_); # restore $key + &mov ($rounds,$rounds_); # restore $rounds + &sub ($len,0x60); + &jnc (&label("ecb_enc_loop6")); + + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &movups (&QWP(0x40,$out),$inout4); + &movups (&QWP(0x50,$out),$inout5); + &lea ($out,&DWP(0x60,$out)); + &add ($len,0x60); + &jz (&label("ecb_ret")); + +&set_label("ecb_enc_tail"); + &movups ($inout0,&QWP(0,$inp)); + &cmp ($len,0x20); + &jb (&label("ecb_enc_one")); + &movups ($inout1,&QWP(0x10,$inp)); + &je (&label("ecb_enc_two")); + &movups ($inout2,&QWP(0x20,$inp)); + &cmp ($len,0x40); + &jb (&label("ecb_enc_three")); + &movups ($inout3,&QWP(0x30,$inp)); + &je (&label("ecb_enc_four")); + &movups ($inout4,&QWP(0x40,$inp)); + &xorps ($inout5,$inout5); + &call ("_aesni_encrypt6"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &movups (&QWP(0x40,$out),$inout4); + jmp (&label("ecb_ret")); + +&set_label("ecb_enc_one",16); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &movups (&QWP(0,$out),$inout0); + &jmp (&label("ecb_ret")); + +&set_label("ecb_enc_two",16); + &xorps ($inout2,$inout2); + &call ("_aesni_encrypt3"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &jmp (&label("ecb_ret")); + +&set_label("ecb_enc_three",16); + &call ("_aesni_encrypt3"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &jmp (&label("ecb_ret")); + +&set_label("ecb_enc_four",16); + &call ("_aesni_encrypt4"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &jmp (&label("ecb_ret")); +###################################################################### +&set_label("ecb_decrypt",16); + &mov ($key_,$key); # backup $key + &mov ($rounds_,$rounds); # backup $rounds + &cmp ($len,0x60); + &jb (&label("ecb_dec_tail")); + + &movdqu ($inout0,&QWP(0,$inp)); + &movdqu ($inout1,&QWP(0x10,$inp)); + &movdqu ($inout2,&QWP(0x20,$inp)); + &movdqu ($inout3,&QWP(0x30,$inp)); + &movdqu ($inout4,&QWP(0x40,$inp)); + &movdqu ($inout5,&QWP(0x50,$inp)); + &lea ($inp,&DWP(0x60,$inp)); + &sub ($len,0x60); + &jmp (&label("ecb_dec_loop6_enter")); + +&set_label("ecb_dec_loop6",16); + &movups (&QWP(0,$out),$inout0); + &movdqu ($inout0,&QWP(0,$inp)); + &movups (&QWP(0x10,$out),$inout1); + &movdqu ($inout1,&QWP(0x10,$inp)); + &movups (&QWP(0x20,$out),$inout2); + &movdqu ($inout2,&QWP(0x20,$inp)); + &movups (&QWP(0x30,$out),$inout3); + &movdqu ($inout3,&QWP(0x30,$inp)); + &movups (&QWP(0x40,$out),$inout4); + &movdqu ($inout4,&QWP(0x40,$inp)); + &movups (&QWP(0x50,$out),$inout5); + &lea ($out,&DWP(0x60,$out)); + &movdqu ($inout5,&QWP(0x50,$inp)); + &lea ($inp,&DWP(0x60,$inp)); +&set_label("ecb_dec_loop6_enter"); + + &call ("_aesni_decrypt6"); + + &mov ($key,$key_); # restore $key + &mov ($rounds,$rounds_); # restore $rounds + &sub ($len,0x60); + &jnc (&label("ecb_dec_loop6")); + + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &movups (&QWP(0x40,$out),$inout4); + &movups (&QWP(0x50,$out),$inout5); + &lea ($out,&DWP(0x60,$out)); + &add ($len,0x60); + &jz (&label("ecb_ret")); + +&set_label("ecb_dec_tail"); + &movups ($inout0,&QWP(0,$inp)); + &cmp ($len,0x20); + &jb (&label("ecb_dec_one")); + &movups ($inout1,&QWP(0x10,$inp)); + &je (&label("ecb_dec_two")); + &movups ($inout2,&QWP(0x20,$inp)); + &cmp ($len,0x40); + &jb (&label("ecb_dec_three")); + &movups ($inout3,&QWP(0x30,$inp)); + &je (&label("ecb_dec_four")); + &movups ($inout4,&QWP(0x40,$inp)); + &xorps ($inout5,$inout5); + &call ("_aesni_decrypt6"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &movups (&QWP(0x40,$out),$inout4); + &jmp (&label("ecb_ret")); + +&set_label("ecb_dec_one",16); + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &movups (&QWP(0,$out),$inout0); + &jmp (&label("ecb_ret")); + +&set_label("ecb_dec_two",16); + &xorps ($inout2,$inout2); + &call ("_aesni_decrypt3"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &jmp (&label("ecb_ret")); + +&set_label("ecb_dec_three",16); + &call ("_aesni_decrypt3"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &jmp (&label("ecb_ret")); + +&set_label("ecb_dec_four",16); + &call ("_aesni_decrypt4"); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + +&set_label("ecb_ret"); +&function_end("aesni_ecb_encrypt"); + +###################################################################### +# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, +# size_t blocks, const AES_KEY *key, +# const char *ivec,char *cmac); +# +# Handles only complete blocks, operates on 64-bit counter and +# does not update *ivec! Nor does it finalize CMAC value +# (see engine/eng_aesni.c for details) +# +{ my $cmac=$inout1; +&function_begin("aesni_ccm64_encrypt_blocks"); + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); + &mov ($rounds_,&wparam(4)); + &mov ($rounds,&wparam(5)); + &mov ($key_,"esp"); + &sub ("esp",60); + &and ("esp",-16); # align stack + &mov (&DWP(48,"esp"),$key_); + + &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec + &movdqu ($cmac,&QWP(0,$rounds)); # load cmac + &mov ($rounds,&DWP(240,$key)); + + # compose byte-swap control mask for pshufb on stack + &mov (&DWP(0,"esp"),0x0c0d0e0f); + &mov (&DWP(4,"esp"),0x08090a0b); + &mov (&DWP(8,"esp"),0x04050607); + &mov (&DWP(12,"esp"),0x00010203); + + # compose counter increment vector on stack + &mov ($rounds_,1); + &xor ($key_,$key_); + &mov (&DWP(16,"esp"),$rounds_); + &mov (&DWP(20,"esp"),$key_); + &mov (&DWP(24,"esp"),$key_); + &mov (&DWP(28,"esp"),$key_); + + &shr ($rounds,1); + &lea ($key_,&DWP(0,$key)); + &movdqa ($inout3,&QWP(0,"esp")); + &movdqa ($inout0,$ivec); + &mov ($rounds_,$rounds); + &pshufb ($ivec,$inout3); + +&set_label("ccm64_enc_outer"); + &$movekey ($rndkey0,&QWP(0,$key_)); + &mov ($rounds,$rounds_); + &movups ($in0,&QWP(0,$inp)); + + &xorps ($inout0,$rndkey0); + &$movekey ($rndkey1,&QWP(16,$key_)); + &xorps ($rndkey0,$in0); + &lea ($key,&DWP(32,$key_)); + &xorps ($cmac,$rndkey0); # cmac^=inp + &$movekey ($rndkey0,&QWP(0,$key)); + +&set_label("ccm64_enc2_loop"); + &aesenc ($inout0,$rndkey1); + &dec ($rounds); + &aesenc ($cmac,$rndkey1); + &$movekey ($rndkey1,&QWP(16,$key)); + &aesenc ($inout0,$rndkey0); + &lea ($key,&DWP(32,$key)); + &aesenc ($cmac,$rndkey0); + &$movekey ($rndkey0,&QWP(0,$key)); + &jnz (&label("ccm64_enc2_loop")); + &aesenc ($inout0,$rndkey1); + &aesenc ($cmac,$rndkey1); + &paddq ($ivec,&QWP(16,"esp")); + &aesenclast ($inout0,$rndkey0); + &aesenclast ($cmac,$rndkey0); + + &dec ($len); + &lea ($inp,&DWP(16,$inp)); + &xorps ($in0,$inout0); # inp^=E(ivec) + &movdqa ($inout0,$ivec); + &movups (&QWP(0,$out),$in0); # save output + &lea ($out,&DWP(16,$out)); + &pshufb ($inout0,$inout3); + &jnz (&label("ccm64_enc_outer")); + + &mov ("esp",&DWP(48,"esp")); + &mov ($out,&wparam(5)); + &movups (&QWP(0,$out),$cmac); +&function_end("aesni_ccm64_encrypt_blocks"); + +&function_begin("aesni_ccm64_decrypt_blocks"); + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); + &mov ($rounds_,&wparam(4)); + &mov ($rounds,&wparam(5)); + &mov ($key_,"esp"); + &sub ("esp",60); + &and ("esp",-16); # align stack + &mov (&DWP(48,"esp"),$key_); + + &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec + &movdqu ($cmac,&QWP(0,$rounds)); # load cmac + &mov ($rounds,&DWP(240,$key)); + + # compose byte-swap control mask for pshufb on stack + &mov (&DWP(0,"esp"),0x0c0d0e0f); + &mov (&DWP(4,"esp"),0x08090a0b); + &mov (&DWP(8,"esp"),0x04050607); + &mov (&DWP(12,"esp"),0x00010203); + + # compose counter increment vector on stack + &mov ($rounds_,1); + &xor ($key_,$key_); + &mov (&DWP(16,"esp"),$rounds_); + &mov (&DWP(20,"esp"),$key_); + &mov (&DWP(24,"esp"),$key_); + &mov (&DWP(28,"esp"),$key_); + + &movdqa ($inout3,&QWP(0,"esp")); # bswap mask + &movdqa ($inout0,$ivec); + + &mov ($key_,$key); + &mov ($rounds_,$rounds); + + &pshufb ($ivec,$inout3); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &movups ($in0,&QWP(0,$inp)); # load inp + &paddq ($ivec,&QWP(16,"esp")); + &lea ($inp,&QWP(16,$inp)); + &jmp (&label("ccm64_dec_outer")); + +&set_label("ccm64_dec_outer",16); + &xorps ($in0,$inout0); # inp ^= E(ivec) + &movdqa ($inout0,$ivec); + &mov ($rounds,$rounds_); + &movups (&QWP(0,$out),$in0); # save output + &lea ($out,&DWP(16,$out)); + &pshufb ($inout0,$inout3); + + &sub ($len,1); + &jz (&label("ccm64_dec_break")); + + &$movekey ($rndkey0,&QWP(0,$key_)); + &shr ($rounds,1); + &$movekey ($rndkey1,&QWP(16,$key_)); + &xorps ($in0,$rndkey0); + &lea ($key,&DWP(32,$key_)); + &xorps ($inout0,$rndkey0); + &xorps ($cmac,$in0); # cmac^=out + &$movekey ($rndkey0,&QWP(0,$key)); + +&set_label("ccm64_dec2_loop"); + &aesenc ($inout0,$rndkey1); + &dec ($rounds); + &aesenc ($cmac,$rndkey1); + &$movekey ($rndkey1,&QWP(16,$key)); + &aesenc ($inout0,$rndkey0); + &lea ($key,&DWP(32,$key)); + &aesenc ($cmac,$rndkey0); + &$movekey ($rndkey0,&QWP(0,$key)); + &jnz (&label("ccm64_dec2_loop")); + &movups ($in0,&QWP(0,$inp)); # load inp + &paddq ($ivec,&QWP(16,"esp")); + &aesenc ($inout0,$rndkey1); + &aesenc ($cmac,$rndkey1); + &lea ($inp,&QWP(16,$inp)); + &aesenclast ($inout0,$rndkey0); + &aesenclast ($cmac,$rndkey0); + &jmp (&label("ccm64_dec_outer")); + +&set_label("ccm64_dec_break",16); + &mov ($key,$key_); + if ($inline) + { &aesni_inline_generate1("enc",$cmac,$in0); } + else + { &call ("_aesni_encrypt1",$cmac); } + + &mov ("esp",&DWP(48,"esp")); + &mov ($out,&wparam(5)); + &movups (&QWP(0,$out),$cmac); +&function_end("aesni_ccm64_decrypt_blocks"); +} + +###################################################################### +# void aesni_ctr32_encrypt_blocks (const void *in, void *out, +# size_t blocks, const AES_KEY *key, +# const char *ivec); +# +# Handles only complete blocks, operates on 32-bit counter and +# does not update *ivec! (see engine/eng_aesni.c for details) +# +# stack layout: +# 0 pshufb mask +# 16 vector addend: 0,6,6,6 +# 32 counter-less ivec +# 48 1st triplet of counter vector +# 64 2nd triplet of counter vector +# 80 saved %esp + +&function_begin("aesni_ctr32_encrypt_blocks"); + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); + &mov ($rounds_,&wparam(4)); + &mov ($key_,"esp"); + &sub ("esp",88); + &and ("esp",-16); # align stack + &mov (&DWP(80,"esp"),$key_); + + &cmp ($len,1); + &je (&label("ctr32_one_shortcut")); + + &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec + + # compose byte-swap control mask for pshufb on stack + &mov (&DWP(0,"esp"),0x0c0d0e0f); + &mov (&DWP(4,"esp"),0x08090a0b); + &mov (&DWP(8,"esp"),0x04050607); + &mov (&DWP(12,"esp"),0x00010203); + + # compose counter increment vector on stack + &mov ($rounds,6); + &xor ($key_,$key_); + &mov (&DWP(16,"esp"),$rounds); + &mov (&DWP(20,"esp"),$rounds); + &mov (&DWP(24,"esp"),$rounds); + &mov (&DWP(28,"esp"),$key_); + + &pextrd ($rounds_,$inout5,3); # pull 32-bit counter + &pinsrd ($inout5,$key_,3); # wipe 32-bit counter + + &mov ($rounds,&DWP(240,$key)); # key->rounds + + # compose 2 vectors of 3x32-bit counters + &bswap ($rounds_); + &pxor ($rndkey1,$rndkey1); + &pxor ($rndkey0,$rndkey0); + &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask + &pinsrd ($rndkey1,$rounds_,0); + &lea ($key_,&DWP(3,$rounds_)); + &pinsrd ($rndkey0,$key_,0); + &inc ($rounds_); + &pinsrd ($rndkey1,$rounds_,1); + &inc ($key_); + &pinsrd ($rndkey0,$key_,1); + &inc ($rounds_); + &pinsrd ($rndkey1,$rounds_,2); + &inc ($key_); + &pinsrd ($rndkey0,$key_,2); + &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet + &pshufb ($rndkey1,$inout0); # byte swap + &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet + &pshufb ($rndkey0,$inout0); # byte swap + + &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword + &pshufd ($inout1,$rndkey1,2<<6); + &cmp ($len,6); + &jb (&label("ctr32_tail")); + &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec + &shr ($rounds,1); + &mov ($key_,$key); # backup $key + &mov ($rounds_,$rounds); # backup $rounds + &sub ($len,6); + &jmp (&label("ctr32_loop6")); + +&set_label("ctr32_loop6",16); + &pshufd ($inout2,$rndkey1,1<<6); + &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec + &pshufd ($inout3,$rndkey0,3<<6); + &por ($inout0,$rndkey1); # merge counter-less ivec + &pshufd ($inout4,$rndkey0,2<<6); + &por ($inout1,$rndkey1); + &pshufd ($inout5,$rndkey0,1<<6); + &por ($inout2,$rndkey1); + &por ($inout3,$rndkey1); + &por ($inout4,$rndkey1); + &por ($inout5,$rndkey1); + + # inlining _aesni_encrypt6's prologue gives ~4% improvement... + &$movekey ($rndkey0,&QWP(0,$key_)); + &$movekey ($rndkey1,&QWP(16,$key_)); + &lea ($key,&DWP(32,$key_)); + &dec ($rounds); + &pxor ($inout0,$rndkey0); + &pxor ($inout1,$rndkey0); + &aesenc ($inout0,$rndkey1); + &pxor ($inout2,$rndkey0); + &aesenc ($inout1,$rndkey1); + &pxor ($inout3,$rndkey0); + &aesenc ($inout2,$rndkey1); + &pxor ($inout4,$rndkey0); + &aesenc ($inout3,$rndkey1); + &pxor ($inout5,$rndkey0); + &aesenc ($inout4,$rndkey1); + &$movekey ($rndkey0,&QWP(0,$key)); + &aesenc ($inout5,$rndkey1); + + &call (&label("_aesni_encrypt6_enter")); + + &movups ($rndkey1,&QWP(0,$inp)); + &movups ($rndkey0,&QWP(0x10,$inp)); + &xorps ($inout0,$rndkey1); + &movups ($rndkey1,&QWP(0x20,$inp)); + &xorps ($inout1,$rndkey0); + &movups (&QWP(0,$out),$inout0); + &movdqa ($rndkey0,&QWP(16,"esp")); # load increment + &xorps ($inout2,$rndkey1); + &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + + &paddd ($rndkey1,$rndkey0); # 1st triplet increment + &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment + &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask + + &movups ($inout1,&QWP(0x30,$inp)); + &movups ($inout2,&QWP(0x40,$inp)); + &xorps ($inout3,$inout1); + &movups ($inout1,&QWP(0x50,$inp)); + &lea ($inp,&DWP(0x60,$inp)); + &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet + &pshufb ($rndkey1,$inout0); # byte swap + &xorps ($inout4,$inout2); + &movups (&QWP(0x30,$out),$inout3); + &xorps ($inout5,$inout1); + &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet + &pshufb ($rndkey0,$inout0); # byte swap + &movups (&QWP(0x40,$out),$inout4); + &pshufd ($inout0,$rndkey1,3<<6); + &movups (&QWP(0x50,$out),$inout5); + &lea ($out,&DWP(0x60,$out)); + + &mov ($rounds,$rounds_); + &pshufd ($inout1,$rndkey1,2<<6); + &sub ($len,6); + &jnc (&label("ctr32_loop6")); + + &add ($len,6); + &jz (&label("ctr32_ret")); + &mov ($key,$key_); + &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds + &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec + +&set_label("ctr32_tail"); + &por ($inout0,$inout5); + &cmp ($len,2); + &jb (&label("ctr32_one")); + + &pshufd ($inout2,$rndkey1,1<<6); + &por ($inout1,$inout5); + &je (&label("ctr32_two")); + + &pshufd ($inout3,$rndkey0,3<<6); + &por ($inout2,$inout5); + &cmp ($len,4); + &jb (&label("ctr32_three")); + + &pshufd ($inout4,$rndkey0,2<<6); + &por ($inout3,$inout5); + &je (&label("ctr32_four")); + + &por ($inout4,$inout5); + &call ("_aesni_encrypt6"); + &movups ($rndkey1,&QWP(0,$inp)); + &movups ($rndkey0,&QWP(0x10,$inp)); + &xorps ($inout0,$rndkey1); + &movups ($rndkey1,&QWP(0x20,$inp)); + &xorps ($inout1,$rndkey0); + &movups ($rndkey0,&QWP(0x30,$inp)); + &xorps ($inout2,$rndkey1); + &movups ($rndkey1,&QWP(0x40,$inp)); + &xorps ($inout3,$rndkey0); + &movups (&QWP(0,$out),$inout0); + &xorps ($inout4,$rndkey1); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &movups (&QWP(0x40,$out),$inout4); + &jmp (&label("ctr32_ret")); + +&set_label("ctr32_one_shortcut",16); + &movups ($inout0,&QWP(0,$rounds_)); # load ivec + &mov ($rounds,&DWP(240,$key)); + +&set_label("ctr32_one"); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &movups ($in0,&QWP(0,$inp)); + &xorps ($in0,$inout0); + &movups (&QWP(0,$out),$in0); + &jmp (&label("ctr32_ret")); + +&set_label("ctr32_two",16); + &call ("_aesni_encrypt3"); + &movups ($inout3,&QWP(0,$inp)); + &movups ($inout4,&QWP(0x10,$inp)); + &xorps ($inout0,$inout3); + &xorps ($inout1,$inout4); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &jmp (&label("ctr32_ret")); + +&set_label("ctr32_three",16); + &call ("_aesni_encrypt3"); + &movups ($inout3,&QWP(0,$inp)); + &movups ($inout4,&QWP(0x10,$inp)); + &xorps ($inout0,$inout3); + &movups ($inout5,&QWP(0x20,$inp)); + &xorps ($inout1,$inout4); + &movups (&QWP(0,$out),$inout0); + &xorps ($inout2,$inout5); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &jmp (&label("ctr32_ret")); + +&set_label("ctr32_four",16); + &call ("_aesni_encrypt4"); + &movups ($inout4,&QWP(0,$inp)); + &movups ($inout5,&QWP(0x10,$inp)); + &movups ($rndkey1,&QWP(0x20,$inp)); + &xorps ($inout0,$inout4); + &movups ($rndkey0,&QWP(0x30,$inp)); + &xorps ($inout1,$inout5); + &movups (&QWP(0,$out),$inout0); + &xorps ($inout2,$rndkey1); + &movups (&QWP(0x10,$out),$inout1); + &xorps ($inout3,$rndkey0); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + +&set_label("ctr32_ret"); + &mov ("esp",&DWP(80,"esp")); +&function_end("aesni_ctr32_encrypt_blocks"); + +###################################################################### +# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2 +# const unsigned char iv[16]); +# +{ my ($tweak,$twtmp,$twres,$twmask)=($rndkey1,$rndkey0,$inout0,$inout1); + +&function_begin("aesni_xts_encrypt"); + &mov ($key,&wparam(4)); # key2 + &mov ($inp,&wparam(5)); # clear-text tweak + + &mov ($rounds,&DWP(240,$key)); # key2->rounds + &movups ($inout0,&QWP(0,$inp)); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); # key1 + + &mov ($key_,"esp"); + &sub ("esp",16*7+8); + &mov ($rounds,&DWP(240,$key)); # key1->rounds + &and ("esp",-16); # align stack + + &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant + &mov (&DWP(16*6+4,"esp"),0); + &mov (&DWP(16*6+8,"esp"),1); + &mov (&DWP(16*6+12,"esp"),0); + &mov (&DWP(16*7+0,"esp"),$len); # save original $len + &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp + + &movdqa ($tweak,$inout0); + &pxor ($twtmp,$twtmp); + &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + + &and ($len,-16); + &mov ($key_,$key); # backup $key + &mov ($rounds_,$rounds); # backup $rounds + &sub ($len,16*6); + &jc (&label("xts_enc_short")); + + &shr ($rounds,1); + &mov ($rounds_,$rounds); + &jmp (&label("xts_enc_loop6")); + +&set_label("xts_enc_loop6",16); + for ($i=0;$i<4;$i++) { + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa (&QWP(16*$i,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd ($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + } + &pshufd ($inout5,$twtmp,0x13); + &movdqa (&QWP(16*$i++,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &$movekey ($rndkey0,&QWP(0,$key_)); + &pand ($inout5,$twmask); # isolate carry and residue + &movups ($inout0,&QWP(0,$inp)); # load input + &pxor ($inout5,$tweak); + + # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] + &movdqu ($inout1,&QWP(16*1,$inp)); + &xorps ($inout0,$rndkey0); # input^=rndkey[0] + &movdqu ($inout2,&QWP(16*2,$inp)); + &pxor ($inout1,$rndkey0); + &movdqu ($inout3,&QWP(16*3,$inp)); + &pxor ($inout2,$rndkey0); + &movdqu ($inout4,&QWP(16*4,$inp)); + &pxor ($inout3,$rndkey0); + &movdqu ($rndkey1,&QWP(16*5,$inp)); + &pxor ($inout4,$rndkey0); + &lea ($inp,&DWP(16*6,$inp)); + &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak + &pxor ($inout5,$rndkey1); + + &$movekey ($rndkey1,&QWP(16,$key_)); + &lea ($key,&DWP(32,$key_)); + &pxor ($inout1,&QWP(16*1,"esp")); + &aesenc ($inout0,$rndkey1); + &pxor ($inout2,&QWP(16*2,"esp")); + &aesenc ($inout1,$rndkey1); + &pxor ($inout3,&QWP(16*3,"esp")); + &dec ($rounds); + &aesenc ($inout2,$rndkey1); + &pxor ($inout4,&QWP(16*4,"esp")); + &aesenc ($inout3,$rndkey1); + &pxor ($inout5,$rndkey0); + &aesenc ($inout4,$rndkey1); + &$movekey ($rndkey0,&QWP(0,$key)); + &aesenc ($inout5,$rndkey1); + &call (&label("_aesni_encrypt6_enter")); + + &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak + &pxor ($twtmp,$twtmp); + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &pcmpgtd ($twtmp,$tweak); # broadcast upper bits + &xorps ($inout1,&QWP(16*1,"esp")); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout2,&QWP(16*2,"esp")); + &movups (&QWP(16*1,$out),$inout1); + &xorps ($inout3,&QWP(16*3,"esp")); + &movups (&QWP(16*2,$out),$inout2); + &xorps ($inout4,&QWP(16*4,"esp")); + &movups (&QWP(16*3,$out),$inout3); + &xorps ($inout5,$tweak); + &movups (&QWP(16*4,$out),$inout4); + &pshufd ($twres,$twtmp,0x13); + &movups (&QWP(16*5,$out),$inout5); + &lea ($out,&DWP(16*6,$out)); + &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 + + &pxor ($twtmp,$twtmp); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &mov ($rounds,$rounds_); # restore $rounds + &pxor ($tweak,$twres); + + &sub ($len,16*6); + &jnc (&label("xts_enc_loop6")); + + &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds + &mov ($key,$key_); # restore $key + &mov ($rounds_,$rounds); + +&set_label("xts_enc_short"); + &add ($len,16*6); + &jz (&label("xts_enc_done6x")); + + &movdqa ($inout3,$tweak); # put aside previous tweak + &cmp ($len,0x20); + &jb (&label("xts_enc_one")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &je (&label("xts_enc_two")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa ($inout4,$tweak); # put aside previous tweak + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &cmp ($len,0x40); + &jb (&label("xts_enc_three")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa ($inout5,$tweak); # put aside previous tweak + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &movdqa (&QWP(16*0,"esp"),$inout3); + &movdqa (&QWP(16*1,"esp"),$inout4); + &je (&label("xts_enc_four")); + + &movdqa (&QWP(16*2,"esp"),$inout5); + &pshufd ($inout5,$twtmp,0x13); + &movdqa (&QWP(16*3,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($inout0,1); + &pand ($inout5,$twmask); # isolate carry and residue + &pxor ($inout5,$tweak); + + &movdqu ($inout0,&QWP(16*0,$inp)); # load input + &movdqu ($inout1,&QWP(16*1,$inp)); + &movdqu ($inout2,&QWP(16*2,$inp)); + &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movdqu ($inout3,&QWP(16*3,$inp)); + &pxor ($inout1,&QWP(16*1,"esp")); + &movdqu ($inout4,&QWP(16*4,$inp)); + &pxor ($inout2,&QWP(16*2,"esp")); + &lea ($inp,&DWP(16*5,$inp)); + &pxor ($inout3,&QWP(16*3,"esp")); + &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak + &pxor ($inout4,$inout5); + + &call ("_aesni_encrypt6"); + + &movaps ($tweak,&QWP(16*4,"esp")); # last tweak + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,&QWP(16*2,"esp")); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout3,&QWP(16*3,"esp")); + &movups (&QWP(16*1,$out),$inout1); + &xorps ($inout4,$tweak); + &movups (&QWP(16*2,$out),$inout2); + &movups (&QWP(16*3,$out),$inout3); + &movups (&QWP(16*4,$out),$inout4); + &lea ($out,&DWP(16*5,$out)); + &jmp (&label("xts_enc_done")); + +&set_label("xts_enc_one",16); + &movups ($inout0,&QWP(16*0,$inp)); # load input + &lea ($inp,&DWP(16*1,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &xorps ($inout0,$inout3); # output^=tweak + &movups (&QWP(16*0,$out),$inout0); # write output + &lea ($out,&DWP(16*1,$out)); + + &movdqa ($tweak,$inout3); # last tweak + &jmp (&label("xts_enc_done")); + +&set_label("xts_enc_two",16); + &movaps ($inout4,$tweak); # put aside last tweak + + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &lea ($inp,&DWP(16*2,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + &xorps ($inout1,$inout4); + &xorps ($inout2,$inout2); + + &call ("_aesni_encrypt3"); + + &xorps ($inout0,$inout3); # output^=tweak + &xorps ($inout1,$inout4); + &movups (&QWP(16*0,$out),$inout0); # write output + &movups (&QWP(16*1,$out),$inout1); + &lea ($out,&DWP(16*2,$out)); + + &movdqa ($tweak,$inout4); # last tweak + &jmp (&label("xts_enc_done")); + +&set_label("xts_enc_three",16); + &movaps ($inout5,$tweak); # put aside last tweak + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &movups ($inout2,&QWP(16*2,$inp)); + &lea ($inp,&DWP(16*3,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + &xorps ($inout1,$inout4); + &xorps ($inout2,$inout5); + + &call ("_aesni_encrypt3"); + + &xorps ($inout0,$inout3); # output^=tweak + &xorps ($inout1,$inout4); + &xorps ($inout2,$inout5); + &movups (&QWP(16*0,$out),$inout0); # write output + &movups (&QWP(16*1,$out),$inout1); + &movups (&QWP(16*2,$out),$inout2); + &lea ($out,&DWP(16*3,$out)); + + &movdqa ($tweak,$inout5); # last tweak + &jmp (&label("xts_enc_done")); + +&set_label("xts_enc_four",16); + &movaps ($inout4,$tweak); # put aside last tweak + + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &movups ($inout2,&QWP(16*2,$inp)); + &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movups ($inout3,&QWP(16*3,$inp)); + &lea ($inp,&DWP(16*4,$inp)); + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,$inout5); + &xorps ($inout3,$inout4); + + &call ("_aesni_encrypt4"); + + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,$inout5); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout3,$inout4); + &movups (&QWP(16*1,$out),$inout1); + &movups (&QWP(16*2,$out),$inout2); + &movups (&QWP(16*3,$out),$inout3); + &lea ($out,&DWP(16*4,$out)); + + &movdqa ($tweak,$inout4); # last tweak + &jmp (&label("xts_enc_done")); + +&set_label("xts_enc_done6x",16); # $tweak is pre-calculated + &mov ($len,&DWP(16*7+0,"esp")); # restore original $len + &and ($len,15); + &jz (&label("xts_enc_ret")); + &movdqa ($inout3,$tweak); + &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 + &jmp (&label("xts_enc_steal")); + +&set_label("xts_enc_done",16); + &mov ($len,&DWP(16*7+0,"esp")); # restore original $len + &pxor ($twtmp,$twtmp); + &and ($len,15); + &jz (&label("xts_enc_ret")); + + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 + &pshufd ($inout3,$twtmp,0x13); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue + &pxor ($inout3,$tweak); + +&set_label("xts_enc_steal"); + &movz ($rounds,&BP(0,$inp)); + &movz ($key,&BP(-16,$out)); + &lea ($inp,&DWP(1,$inp)); + &mov (&BP(-16,$out),&LB($rounds)); + &mov (&BP(0,$out),&LB($key)); + &lea ($out,&DWP(1,$out)); + &sub ($len,1); + &jnz (&label("xts_enc_steal")); + + &sub ($out,&DWP(16*7+0,"esp")); # rewind $out + &mov ($key,$key_); # restore $key + &mov ($rounds,$rounds_); # restore $rounds + + &movups ($inout0,&QWP(-16,$out)); # load input + &xorps ($inout0,$inout3); # input^=tweak + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + &xorps ($inout0,$inout3); # output^=tweak + &movups (&QWP(-16,$out),$inout0); # write output + +&set_label("xts_enc_ret"); + &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp +&function_end("aesni_xts_encrypt"); + +&function_begin("aesni_xts_decrypt"); + &mov ($key,&wparam(4)); # key2 + &mov ($inp,&wparam(5)); # clear-text tweak + + &mov ($rounds,&DWP(240,$key)); # key2->rounds + &movups ($inout0,&QWP(0,$inp)); + if ($inline) + { &aesni_inline_generate1("enc"); } + else + { &call ("_aesni_encrypt1"); } + + &mov ($inp,&wparam(0)); + &mov ($out,&wparam(1)); + &mov ($len,&wparam(2)); + &mov ($key,&wparam(3)); # key1 + + &mov ($key_,"esp"); + &sub ("esp",16*7+8); + &and ("esp",-16); # align stack + + &xor ($rounds_,$rounds_); # if(len%16) len-=16; + &test ($len,15); + &setnz (&LB($rounds_)); + &shl ($rounds_,4); + &sub ($len,$rounds_); + + &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant + &mov (&DWP(16*6+4,"esp"),0); + &mov (&DWP(16*6+8,"esp"),1); + &mov (&DWP(16*6+12,"esp"),0); + &mov (&DWP(16*7+0,"esp"),$len); # save original $len + &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp + + &mov ($rounds,&DWP(240,$key)); # key1->rounds + &mov ($key_,$key); # backup $key + &mov ($rounds_,$rounds); # backup $rounds + + &movdqa ($tweak,$inout0); + &pxor ($twtmp,$twtmp); + &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + + &and ($len,-16); + &sub ($len,16*6); + &jc (&label("xts_dec_short")); + + &shr ($rounds,1); + &mov ($rounds_,$rounds); + &jmp (&label("xts_dec_loop6")); + +&set_label("xts_dec_loop6",16); + for ($i=0;$i<4;$i++) { + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa (&QWP(16*$i,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd ($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + } + &pshufd ($inout5,$twtmp,0x13); + &movdqa (&QWP(16*$i++,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &$movekey ($rndkey0,&QWP(0,$key_)); + &pand ($inout5,$twmask); # isolate carry and residue + &movups ($inout0,&QWP(0,$inp)); # load input + &pxor ($inout5,$tweak); + + # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] + &movdqu ($inout1,&QWP(16*1,$inp)); + &xorps ($inout0,$rndkey0); # input^=rndkey[0] + &movdqu ($inout2,&QWP(16*2,$inp)); + &pxor ($inout1,$rndkey0); + &movdqu ($inout3,&QWP(16*3,$inp)); + &pxor ($inout2,$rndkey0); + &movdqu ($inout4,&QWP(16*4,$inp)); + &pxor ($inout3,$rndkey0); + &movdqu ($rndkey1,&QWP(16*5,$inp)); + &pxor ($inout4,$rndkey0); + &lea ($inp,&DWP(16*6,$inp)); + &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak + &pxor ($inout5,$rndkey1); + + &$movekey ($rndkey1,&QWP(16,$key_)); + &lea ($key,&DWP(32,$key_)); + &pxor ($inout1,&QWP(16*1,"esp")); + &aesdec ($inout0,$rndkey1); + &pxor ($inout2,&QWP(16*2,"esp")); + &aesdec ($inout1,$rndkey1); + &pxor ($inout3,&QWP(16*3,"esp")); + &dec ($rounds); + &aesdec ($inout2,$rndkey1); + &pxor ($inout4,&QWP(16*4,"esp")); + &aesdec ($inout3,$rndkey1); + &pxor ($inout5,$rndkey0); + &aesdec ($inout4,$rndkey1); + &$movekey ($rndkey0,&QWP(0,$key)); + &aesdec ($inout5,$rndkey1); + &call (&label("_aesni_decrypt6_enter")); + + &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak + &pxor ($twtmp,$twtmp); + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &pcmpgtd ($twtmp,$tweak); # broadcast upper bits + &xorps ($inout1,&QWP(16*1,"esp")); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout2,&QWP(16*2,"esp")); + &movups (&QWP(16*1,$out),$inout1); + &xorps ($inout3,&QWP(16*3,"esp")); + &movups (&QWP(16*2,$out),$inout2); + &xorps ($inout4,&QWP(16*4,"esp")); + &movups (&QWP(16*3,$out),$inout3); + &xorps ($inout5,$tweak); + &movups (&QWP(16*4,$out),$inout4); + &pshufd ($twres,$twtmp,0x13); + &movups (&QWP(16*5,$out),$inout5); + &lea ($out,&DWP(16*6,$out)); + &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 + + &pxor ($twtmp,$twtmp); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &mov ($rounds,$rounds_); # restore $rounds + &pxor ($tweak,$twres); + + &sub ($len,16*6); + &jnc (&label("xts_dec_loop6")); + + &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds + &mov ($key,$key_); # restore $key + &mov ($rounds_,$rounds); + +&set_label("xts_dec_short"); + &add ($len,16*6); + &jz (&label("xts_dec_done6x")); + + &movdqa ($inout3,$tweak); # put aside previous tweak + &cmp ($len,0x20); + &jb (&label("xts_dec_one")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &je (&label("xts_dec_two")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa ($inout4,$tweak); # put aside previous tweak + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &cmp ($len,0x40); + &jb (&label("xts_dec_three")); + + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa ($inout5,$tweak); # put aside previous tweak + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + &movdqa (&QWP(16*0,"esp"),$inout3); + &movdqa (&QWP(16*1,"esp"),$inout4); + &je (&label("xts_dec_four")); + + &movdqa (&QWP(16*2,"esp"),$inout5); + &pshufd ($inout5,$twtmp,0x13); + &movdqa (&QWP(16*3,"esp"),$tweak); + &paddq ($tweak,$tweak); # &psllq($inout0,1); + &pand ($inout5,$twmask); # isolate carry and residue + &pxor ($inout5,$tweak); + + &movdqu ($inout0,&QWP(16*0,$inp)); # load input + &movdqu ($inout1,&QWP(16*1,$inp)); + &movdqu ($inout2,&QWP(16*2,$inp)); + &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movdqu ($inout3,&QWP(16*3,$inp)); + &pxor ($inout1,&QWP(16*1,"esp")); + &movdqu ($inout4,&QWP(16*4,$inp)); + &pxor ($inout2,&QWP(16*2,"esp")); + &lea ($inp,&DWP(16*5,$inp)); + &pxor ($inout3,&QWP(16*3,"esp")); + &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak + &pxor ($inout4,$inout5); + + &call ("_aesni_decrypt6"); + + &movaps ($tweak,&QWP(16*4,"esp")); # last tweak + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,&QWP(16*2,"esp")); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout3,&QWP(16*3,"esp")); + &movups (&QWP(16*1,$out),$inout1); + &xorps ($inout4,$tweak); + &movups (&QWP(16*2,$out),$inout2); + &movups (&QWP(16*3,$out),$inout3); + &movups (&QWP(16*4,$out),$inout4); + &lea ($out,&DWP(16*5,$out)); + &jmp (&label("xts_dec_done")); + +&set_label("xts_dec_one",16); + &movups ($inout0,&QWP(16*0,$inp)); # load input + &lea ($inp,&DWP(16*1,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &xorps ($inout0,$inout3); # output^=tweak + &movups (&QWP(16*0,$out),$inout0); # write output + &lea ($out,&DWP(16*1,$out)); + + &movdqa ($tweak,$inout3); # last tweak + &jmp (&label("xts_dec_done")); + +&set_label("xts_dec_two",16); + &movaps ($inout4,$tweak); # put aside last tweak + + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &lea ($inp,&DWP(16*2,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + &xorps ($inout1,$inout4); + + &call ("_aesni_decrypt3"); + + &xorps ($inout0,$inout3); # output^=tweak + &xorps ($inout1,$inout4); + &movups (&QWP(16*0,$out),$inout0); # write output + &movups (&QWP(16*1,$out),$inout1); + &lea ($out,&DWP(16*2,$out)); + + &movdqa ($tweak,$inout4); # last tweak + &jmp (&label("xts_dec_done")); + +&set_label("xts_dec_three",16); + &movaps ($inout5,$tweak); # put aside last tweak + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &movups ($inout2,&QWP(16*2,$inp)); + &lea ($inp,&DWP(16*3,$inp)); + &xorps ($inout0,$inout3); # input^=tweak + &xorps ($inout1,$inout4); + &xorps ($inout2,$inout5); + + &call ("_aesni_decrypt3"); + + &xorps ($inout0,$inout3); # output^=tweak + &xorps ($inout1,$inout4); + &xorps ($inout2,$inout5); + &movups (&QWP(16*0,$out),$inout0); # write output + &movups (&QWP(16*1,$out),$inout1); + &movups (&QWP(16*2,$out),$inout2); + &lea ($out,&DWP(16*3,$out)); + + &movdqa ($tweak,$inout5); # last tweak + &jmp (&label("xts_dec_done")); + +&set_label("xts_dec_four",16); + &movaps ($inout4,$tweak); # put aside last tweak + + &movups ($inout0,&QWP(16*0,$inp)); # load input + &movups ($inout1,&QWP(16*1,$inp)); + &movups ($inout2,&QWP(16*2,$inp)); + &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak + &movups ($inout3,&QWP(16*3,$inp)); + &lea ($inp,&DWP(16*4,$inp)); + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,$inout5); + &xorps ($inout3,$inout4); + + &call ("_aesni_decrypt4"); + + &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak + &xorps ($inout1,&QWP(16*1,"esp")); + &xorps ($inout2,$inout5); + &movups (&QWP(16*0,$out),$inout0); # write output + &xorps ($inout3,$inout4); + &movups (&QWP(16*1,$out),$inout1); + &movups (&QWP(16*2,$out),$inout2); + &movups (&QWP(16*3,$out),$inout3); + &lea ($out,&DWP(16*4,$out)); + + &movdqa ($tweak,$inout4); # last tweak + &jmp (&label("xts_dec_done")); + +&set_label("xts_dec_done6x",16); # $tweak is pre-calculated + &mov ($len,&DWP(16*7+0,"esp")); # restore original $len + &and ($len,15); + &jz (&label("xts_dec_ret")); + &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 + &jmp (&label("xts_dec_only_one_more")); + +&set_label("xts_dec_done",16); + &mov ($len,&DWP(16*7+0,"esp")); # restore original $len + &pxor ($twtmp,$twtmp); + &and ($len,15); + &jz (&label("xts_dec_ret")); + + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 + &pshufd ($twres,$twtmp,0x13); + &pxor ($twtmp,$twtmp); + &movdqa ($twmask,&QWP(16*6,"esp")); + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($twres,$twmask); # isolate carry and residue + &pcmpgtd($twtmp,$tweak); # broadcast upper bits + &pxor ($tweak,$twres); + +&set_label("xts_dec_only_one_more"); + &pshufd ($inout3,$twtmp,0x13); + &movdqa ($inout4,$tweak); # put aside previous tweak + &paddq ($tweak,$tweak); # &psllq($tweak,1); + &pand ($inout3,$twmask); # isolate carry and residue + &pxor ($inout3,$tweak); + + &mov ($key,$key_); # restore $key + &mov ($rounds,$rounds_); # restore $rounds + + &movups ($inout0,&QWP(0,$inp)); # load input + &xorps ($inout0,$inout3); # input^=tweak + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &xorps ($inout0,$inout3); # output^=tweak + &movups (&QWP(0,$out),$inout0); # write output + +&set_label("xts_dec_steal"); + &movz ($rounds,&BP(16,$inp)); + &movz ($key,&BP(0,$out)); + &lea ($inp,&DWP(1,$inp)); + &mov (&BP(0,$out),&LB($rounds)); + &mov (&BP(16,$out),&LB($key)); + &lea ($out,&DWP(1,$out)); + &sub ($len,1); + &jnz (&label("xts_dec_steal")); + + &sub ($out,&DWP(16*7+0,"esp")); # rewind $out + &mov ($key,$key_); # restore $key + &mov ($rounds,$rounds_); # restore $rounds + + &movups ($inout0,&QWP(0,$out)); # load input + &xorps ($inout0,$inout4); # input^=tweak + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &xorps ($inout0,$inout4); # output^=tweak + &movups (&QWP(0,$out),$inout0); # write output + +&set_label("xts_dec_ret"); + &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp +&function_end("aesni_xts_decrypt"); +} +} + +###################################################################### +# void $PREFIX_cbc_encrypt (const void *inp, void *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivp,const int enc); +&function_begin("${PREFIX}_cbc_encrypt"); + &mov ($inp,&wparam(0)); + &mov ($rounds_,"esp"); + &mov ($out,&wparam(1)); + &sub ($rounds_,24); + &mov ($len,&wparam(2)); + &and ($rounds_,-16); + &mov ($key,&wparam(3)); + &mov ($key_,&wparam(4)); + &test ($len,$len); + &jz (&label("cbc_abort")); + + &cmp (&wparam(5),0); + &xchg ($rounds_,"esp"); # alloca + &movups ($ivec,&QWP(0,$key_)); # load IV + &mov ($rounds,&DWP(240,$key)); + &mov ($key_,$key); # backup $key + &mov (&DWP(16,"esp"),$rounds_); # save original %esp + &mov ($rounds_,$rounds); # backup $rounds + &je (&label("cbc_decrypt")); + + &movaps ($inout0,$ivec); + &cmp ($len,16); + &jb (&label("cbc_enc_tail")); + &sub ($len,16); + &jmp (&label("cbc_enc_loop")); + +&set_label("cbc_enc_loop",16); + &movups ($ivec,&QWP(0,$inp)); # input actually + &lea ($inp,&DWP(16,$inp)); + if ($inline) + { &aesni_inline_generate1("enc",$inout0,$ivec); } + else + { &xorps($inout0,$ivec); &call("_aesni_encrypt1"); } + &mov ($rounds,$rounds_); # restore $rounds + &mov ($key,$key_); # restore $key + &movups (&QWP(0,$out),$inout0); # store output + &lea ($out,&DWP(16,$out)); + &sub ($len,16); + &jnc (&label("cbc_enc_loop")); + &add ($len,16); + &jnz (&label("cbc_enc_tail")); + &movaps ($ivec,$inout0); + &jmp (&label("cbc_ret")); + +&set_label("cbc_enc_tail"); + &mov ("ecx",$len); # zaps $rounds + &data_word(0xA4F3F689); # rep movsb + &mov ("ecx",16); # zero tail + &sub ("ecx",$len); + &xor ("eax","eax"); # zaps $len + &data_word(0xAAF3F689); # rep stosb + &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block + &mov ($rounds,$rounds_); # restore $rounds + &mov ($inp,$out); # $inp and $out are the same + &mov ($key,$key_); # restore $key + &jmp (&label("cbc_enc_loop")); +###################################################################### +&set_label("cbc_decrypt",16); + &cmp ($len,0x50); + &jbe (&label("cbc_dec_tail")); + &movaps (&QWP(0,"esp"),$ivec); # save IV + &sub ($len,0x50); + &jmp (&label("cbc_dec_loop6_enter")); + +&set_label("cbc_dec_loop6",16); + &movaps (&QWP(0,"esp"),$rndkey0); # save IV + &movups (&QWP(0,$out),$inout5); + &lea ($out,&DWP(0x10,$out)); +&set_label("cbc_dec_loop6_enter"); + &movdqu ($inout0,&QWP(0,$inp)); + &movdqu ($inout1,&QWP(0x10,$inp)); + &movdqu ($inout2,&QWP(0x20,$inp)); + &movdqu ($inout3,&QWP(0x30,$inp)); + &movdqu ($inout4,&QWP(0x40,$inp)); + &movdqu ($inout5,&QWP(0x50,$inp)); + + &call ("_aesni_decrypt6"); + + &movups ($rndkey1,&QWP(0,$inp)); + &movups ($rndkey0,&QWP(0x10,$inp)); + &xorps ($inout0,&QWP(0,"esp")); # ^=IV + &xorps ($inout1,$rndkey1); + &movups ($rndkey1,&QWP(0x20,$inp)); + &xorps ($inout2,$rndkey0); + &movups ($rndkey0,&QWP(0x30,$inp)); + &xorps ($inout3,$rndkey1); + &movups ($rndkey1,&QWP(0x40,$inp)); + &xorps ($inout4,$rndkey0); + &movups ($rndkey0,&QWP(0x50,$inp)); # IV + &xorps ($inout5,$rndkey1); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &lea ($inp,&DWP(0x60,$inp)); + &movups (&QWP(0x20,$out),$inout2); + &mov ($rounds,$rounds_) # restore $rounds + &movups (&QWP(0x30,$out),$inout3); + &mov ($key,$key_); # restore $key + &movups (&QWP(0x40,$out),$inout4); + &lea ($out,&DWP(0x50,$out)); + &sub ($len,0x60); + &ja (&label("cbc_dec_loop6")); + + &movaps ($inout0,$inout5); + &movaps ($ivec,$rndkey0); + &add ($len,0x50); + &jle (&label("cbc_dec_tail_collected")); + &movups (&QWP(0,$out),$inout0); + &lea ($out,&DWP(0x10,$out)); +&set_label("cbc_dec_tail"); + &movups ($inout0,&QWP(0,$inp)); + &movaps ($in0,$inout0); + &cmp ($len,0x10); + &jbe (&label("cbc_dec_one")); + + &movups ($inout1,&QWP(0x10,$inp)); + &movaps ($in1,$inout1); + &cmp ($len,0x20); + &jbe (&label("cbc_dec_two")); + + &movups ($inout2,&QWP(0x20,$inp)); + &cmp ($len,0x30); + &jbe (&label("cbc_dec_three")); + + &movups ($inout3,&QWP(0x30,$inp)); + &cmp ($len,0x40); + &jbe (&label("cbc_dec_four")); + + &movups ($inout4,&QWP(0x40,$inp)); + &movaps (&QWP(0,"esp"),$ivec); # save IV + &movups ($inout0,&QWP(0,$inp)); + &xorps ($inout5,$inout5); + &call ("_aesni_decrypt6"); + &movups ($rndkey1,&QWP(0,$inp)); + &movups ($rndkey0,&QWP(0x10,$inp)); + &xorps ($inout0,&QWP(0,"esp")); # ^= IV + &xorps ($inout1,$rndkey1); + &movups ($rndkey1,&QWP(0x20,$inp)); + &xorps ($inout2,$rndkey0); + &movups ($rndkey0,&QWP(0x30,$inp)); + &xorps ($inout3,$rndkey1); + &movups ($ivec,&QWP(0x40,$inp)); # IV + &xorps ($inout4,$rndkey0); + &movups (&QWP(0,$out),$inout0); + &movups (&QWP(0x10,$out),$inout1); + &movups (&QWP(0x20,$out),$inout2); + &movups (&QWP(0x30,$out),$inout3); + &lea ($out,&DWP(0x40,$out)); + &movaps ($inout0,$inout4); + &sub ($len,0x50); + &jmp (&label("cbc_dec_tail_collected")); + +&set_label("cbc_dec_one",16); + if ($inline) + { &aesni_inline_generate1("dec"); } + else + { &call ("_aesni_decrypt1"); } + &xorps ($inout0,$ivec); + &movaps ($ivec,$in0); + &sub ($len,0x10); + &jmp (&label("cbc_dec_tail_collected")); + +&set_label("cbc_dec_two",16); + &xorps ($inout2,$inout2); + &call ("_aesni_decrypt3"); + &xorps ($inout0,$ivec); + &xorps ($inout1,$in0); + &movups (&QWP(0,$out),$inout0); + &movaps ($inout0,$inout1); + &lea ($out,&DWP(0x10,$out)); + &movaps ($ivec,$in1); + &sub ($len,0x20); + &jmp (&label("cbc_dec_tail_collected")); + +&set_label("cbc_dec_three",16); + &call ("_aesni_decrypt3"); + &xorps ($inout0,$ivec); + &xorps ($inout1,$in0); + &xorps ($inout2,$in1); + &movups (&QWP(0,$out),$inout0); + &movaps ($inout0,$inout2); + &movups (&QWP(0x10,$out),$inout1); + &lea ($out,&DWP(0x20,$out)); + &movups ($ivec,&QWP(0x20,$inp)); + &sub ($len,0x30); + &jmp (&label("cbc_dec_tail_collected")); + +&set_label("cbc_dec_four",16); + &call ("_aesni_decrypt4"); + &movups ($rndkey1,&QWP(0x10,$inp)); + &movups ($rndkey0,&QWP(0x20,$inp)); + &xorps ($inout0,$ivec); + &movups ($ivec,&QWP(0x30,$inp)); + &xorps ($inout1,$in0); + &movups (&QWP(0,$out),$inout0); + &xorps ($inout2,$rndkey1); + &movups (&QWP(0x10,$out),$inout1); + &xorps ($inout3,$rndkey0); + &movups (&QWP(0x20,$out),$inout2); + &lea ($out,&DWP(0x30,$out)); + &movaps ($inout0,$inout3); + &sub ($len,0x40); + +&set_label("cbc_dec_tail_collected"); + &and ($len,15); + &jnz (&label("cbc_dec_tail_partial")); + &movups (&QWP(0,$out),$inout0); + &jmp (&label("cbc_ret")); + +&set_label("cbc_dec_tail_partial",16); + &movaps (&QWP(0,"esp"),$inout0); + &mov ("ecx",16); + &mov ($inp,"esp"); + &sub ("ecx",$len); + &data_word(0xA4F3F689); # rep movsb + +&set_label("cbc_ret"); + &mov ("esp",&DWP(16,"esp")); # pull original %esp + &mov ($key_,&wparam(4)); + &movups (&QWP(0,$key_),$ivec); # output IV +&set_label("cbc_abort"); +&function_end("${PREFIX}_cbc_encrypt"); + +###################################################################### +# Mechanical port from aesni-x86_64.pl. +# +# _aesni_set_encrypt_key is private interface, +# input: +# "eax" const unsigned char *userKey +# $rounds int bits +# $key AES_KEY *key +# output: +# "eax" return code +# $round rounds + +&function_begin_B("_aesni_set_encrypt_key"); + &test ("eax","eax"); + &jz (&label("bad_pointer")); + &test ($key,$key); + &jz (&label("bad_pointer")); + + &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey + &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 + &lea ($key,&DWP(16,$key)); + &cmp ($rounds,256); + &je (&label("14rounds")); + &cmp ($rounds,192); + &je (&label("12rounds")); + &cmp ($rounds,128); + &jne (&label("bad_keybits")); + +&set_label("10rounds",16); + &mov ($rounds,9); + &$movekey (&QWP(-16,$key),"xmm0"); # round 0 + &aeskeygenassist("xmm1","xmm0",0x01); # round 1 + &call (&label("key_128_cold")); + &aeskeygenassist("xmm1","xmm0",0x2); # round 2 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x04); # round 3 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x08); # round 4 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x10); # round 5 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x20); # round 6 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x40); # round 7 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x80); # round 8 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 + &call (&label("key_128")); + &aeskeygenassist("xmm1","xmm0",0x36); # round 10 + &call (&label("key_128")); + &$movekey (&QWP(0,$key),"xmm0"); + &mov (&DWP(80,$key),$rounds); + &xor ("eax","eax"); + &ret(); + +&set_label("key_128",16); + &$movekey (&QWP(0,$key),"xmm0"); + &lea ($key,&DWP(16,$key)); +&set_label("key_128_cold"); + &shufps ("xmm4","xmm0",0b00010000); + &xorps ("xmm0","xmm4"); + &shufps ("xmm4","xmm0",0b10001100); + &xorps ("xmm0","xmm4"); + &shufps ("xmm1","xmm1",0b11111111); # critical path + &xorps ("xmm0","xmm1"); + &ret(); + +&set_label("12rounds",16); + &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey + &mov ($rounds,11); + &$movekey (&QWP(-16,$key),"xmm0") # round 0 + &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 + &call (&label("key_192a_cold")); + &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 + &call (&label("key_192b")); + &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 + &call (&label("key_192a")); + &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 + &call (&label("key_192b")); + &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 + &call (&label("key_192a")); + &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 + &call (&label("key_192b")); + &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 + &call (&label("key_192a")); + &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 + &call (&label("key_192b")); + &$movekey (&QWP(0,$key),"xmm0"); + &mov (&DWP(48,$key),$rounds); + &xor ("eax","eax"); + &ret(); + +&set_label("key_192a",16); + &$movekey (&QWP(0,$key),"xmm0"); + &lea ($key,&DWP(16,$key)); +&set_label("key_192a_cold",16); + &movaps ("xmm5","xmm2"); +&set_label("key_192b_warm"); + &shufps ("xmm4","xmm0",0b00010000); + &movdqa ("xmm3","xmm2"); + &xorps ("xmm0","xmm4"); + &shufps ("xmm4","xmm0",0b10001100); + &pslldq ("xmm3",4); + &xorps ("xmm0","xmm4"); + &pshufd ("xmm1","xmm1",0b01010101); # critical path + &pxor ("xmm2","xmm3"); + &pxor ("xmm0","xmm1"); + &pshufd ("xmm3","xmm0",0b11111111); + &pxor ("xmm2","xmm3"); + &ret(); + +&set_label("key_192b",16); + &movaps ("xmm3","xmm0"); + &shufps ("xmm5","xmm0",0b01000100); + &$movekey (&QWP(0,$key),"xmm5"); + &shufps ("xmm3","xmm2",0b01001110); + &$movekey (&QWP(16,$key),"xmm3"); + &lea ($key,&DWP(32,$key)); + &jmp (&label("key_192b_warm")); + +&set_label("14rounds",16); + &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey + &mov ($rounds,13); + &lea ($key,&DWP(16,$key)); + &$movekey (&QWP(-32,$key),"xmm0"); # round 0 + &$movekey (&QWP(-16,$key),"xmm2"); # round 1 + &aeskeygenassist("xmm1","xmm2",0x01); # round 2 + &call (&label("key_256a_cold")); + &aeskeygenassist("xmm1","xmm0",0x01); # round 3 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x02); # round 4 + &call (&label("key_256a")); + &aeskeygenassist("xmm1","xmm0",0x02); # round 5 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x04); # round 6 + &call (&label("key_256a")); + &aeskeygenassist("xmm1","xmm0",0x04); # round 7 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x08); # round 8 + &call (&label("key_256a")); + &aeskeygenassist("xmm1","xmm0",0x08); # round 9 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x10); # round 10 + &call (&label("key_256a")); + &aeskeygenassist("xmm1","xmm0",0x10); # round 11 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x20); # round 12 + &call (&label("key_256a")); + &aeskeygenassist("xmm1","xmm0",0x20); # round 13 + &call (&label("key_256b")); + &aeskeygenassist("xmm1","xmm2",0x40); # round 14 + &call (&label("key_256a")); + &$movekey (&QWP(0,$key),"xmm0"); + &mov (&DWP(16,$key),$rounds); + &xor ("eax","eax"); + &ret(); + +&set_label("key_256a",16); + &$movekey (&QWP(0,$key),"xmm2"); + &lea ($key,&DWP(16,$key)); +&set_label("key_256a_cold"); + &shufps ("xmm4","xmm0",0b00010000); + &xorps ("xmm0","xmm4"); + &shufps ("xmm4","xmm0",0b10001100); + &xorps ("xmm0","xmm4"); + &shufps ("xmm1","xmm1",0b11111111); # critical path + &xorps ("xmm0","xmm1"); + &ret(); + +&set_label("key_256b",16); + &$movekey (&QWP(0,$key),"xmm0"); + &lea ($key,&DWP(16,$key)); + + &shufps ("xmm4","xmm2",0b00010000); + &xorps ("xmm2","xmm4"); + &shufps ("xmm4","xmm2",0b10001100); + &xorps ("xmm2","xmm4"); + &shufps ("xmm1","xmm1",0b10101010); # critical path + &xorps ("xmm2","xmm1"); + &ret(); + +&set_label("bad_pointer",4); + &mov ("eax",-1); + &ret (); +&set_label("bad_keybits",4); + &mov ("eax",-2); + &ret (); +&function_end_B("_aesni_set_encrypt_key"); + +# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, +# AES_KEY *key) +&function_begin_B("${PREFIX}_set_encrypt_key"); + &mov ("eax",&wparam(0)); + &mov ($rounds,&wparam(1)); + &mov ($key,&wparam(2)); + &call ("_aesni_set_encrypt_key"); + &ret (); +&function_end_B("${PREFIX}_set_encrypt_key"); + +# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, +# AES_KEY *key) +&function_begin_B("${PREFIX}_set_decrypt_key"); + &mov ("eax",&wparam(0)); + &mov ($rounds,&wparam(1)); + &mov ($key,&wparam(2)); + &call ("_aesni_set_encrypt_key"); + &mov ($key,&wparam(2)); + &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key + &test ("eax","eax"); + &jnz (&label("dec_key_ret")); + &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule + + &$movekey ("xmm0",&QWP(0,$key)); # just swap + &$movekey ("xmm1",&QWP(0,"eax")); + &$movekey (&QWP(0,"eax"),"xmm0"); + &$movekey (&QWP(0,$key),"xmm1"); + &lea ($key,&DWP(16,$key)); + &lea ("eax",&DWP(-16,"eax")); + +&set_label("dec_key_inverse"); + &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse + &$movekey ("xmm1",&QWP(0,"eax")); + &aesimc ("xmm0","xmm0"); + &aesimc ("xmm1","xmm1"); + &lea ($key,&DWP(16,$key)); + &lea ("eax",&DWP(-16,"eax")); + &$movekey (&QWP(16,"eax"),"xmm0"); + &$movekey (&QWP(-16,$key),"xmm1"); + &cmp ("eax",$key); + &ja (&label("dec_key_inverse")); + + &$movekey ("xmm0",&QWP(0,$key)); # inverse middle + &aesimc ("xmm0","xmm0"); + &$movekey (&QWP(0,$key),"xmm0"); + + &xor ("eax","eax"); # return success +&set_label("dec_key_ret"); + &ret (); +&function_end_B("${PREFIX}_set_decrypt_key"); +&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); + +&asm_finish(); diff --git a/app/openssl/crypto/aes/asm/aesni-x86_64.S b/app/openssl/crypto/aes/asm/aesni-x86_64.S new file mode 100644 index 00000000..917c8323 --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-x86_64.S @@ -0,0 +1,2535 @@ +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 + movups (%rcx),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 + movups (%rcx),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%rcx),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%rcx),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + movups (%rcx),%xmm0 +.byte 102,15,56,220,249 + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lenc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + movups (%rcx),%xmm0 +.byte 102,15,56,222,249 + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.Ldec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 + jmp .Lenc_loop8_enter +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 +.Lenc_loop8_enter: +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups (%rcx),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + jmp .Ldec_loop8_enter +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 +.Ldec_loop8_enter: +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups (%rcx),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +.Lecb_ret: + .byte 0xf3,0xc3 +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz .Lccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm6,%xmm9 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz .Lccm64_enc_outer + + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 +.byte 102,15,56,221,209 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 16(%rdi),%rdi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 +.byte 102,15,56,221,217 + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + je .Lctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + jmp .Lctr32_enc_loop6_enter +.align 16 +.Lctr32_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 + +.byte 102,15,56,220,209 + paddd %xmm13,%xmm12 +.byte 102,15,56,220,217 + paddd -24(%rsp),%xmm13 +.byte 102,15,56,220,225 + movdqa %xmm12,-40(%rsp) +.byte 102,15,56,220,233 + movdqa %xmm13,-24(%rsp) +.byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 +.byte 102,15,56,220,249 +.byte 102,69,15,56,0,239 + +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +.Lctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +.Lctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +.Lctr32_done: + .byte 0xf3,0xc3 +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_8: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 +.byte 102,68,15,56,221,249 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_enc_grandloop + +.align 16 +.Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.align 16 +.Lxts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lxts_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lxts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,208 + pand %xmm8,%xmm9 +.byte 102,15,56,220,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_enc_short: + addq $96,%rdx + jz .Lxts_enc_done + + cmpq $32,%rdx + jb .Lxts_enc_one + je .Lxts_enc_two + + cmpq $64,%rdx + jb .Lxts_enc_three + je .Lxts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + leaq 104(%rsp),%rsp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_11: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 +.byte 102,68,15,56,221,249 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_dec_grandloop + +.align 16 +.Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter + +.align 16 +.Lxts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.Lxts_dec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz .Lxts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,208 + pand %xmm8,%xmm9 +.byte 102,15,56,222,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,208 + pand %xmm8,%xmm9 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_dec_short: + addq $96,%rdx + jz .Lxts_dec_done + + cmpq $32,%rdx + jb .Lxts_dec_one + je .Lxts_dec_two + + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + leaq 104(%rsp),%rsp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe .Lcbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movaps %xmm0,-24(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe .Lcbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,-24(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq -24(%rsp),%rsi +.long 0x9066A4F3 + +.Lcbc_dec_ret: +.Lcbc_ret: + .byte 0xf3,0xc3 +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%rdi) +.Ldec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/app/openssl/crypto/aes/asm/aesni-x86_64.pl b/app/openssl/crypto/aes/asm/aesni-x86_64.pl new file mode 100644 index 00000000..0dbb194b --- /dev/null +++ b/app/openssl/crypto/aes/asm/aesni-x86_64.pl @@ -0,0 +1,3069 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for Intel AES-NI extension. In +# OpenSSL context it's used with Intel engine, but can also be used as +# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for +# details]. +# +# Performance. +# +# Given aes(enc|dec) instructions' latency asymptotic performance for +# non-parallelizable modes such as CBC encrypt is 3.75 cycles per byte +# processed with 128-bit key. And given their throughput asymptotic +# performance for parallelizable modes is 1.25 cycles per byte. Being +# asymptotic limit it's not something you commonly achieve in reality, +# but how close does one get? Below are results collected for +# different modes and block sized. Pairs of numbers are for en-/ +# decryption. +# +# 16-byte 64-byte 256-byte 1-KB 8-KB +# ECB 4.25/4.25 1.38/1.38 1.28/1.28 1.26/1.26 1.26/1.26 +# CTR 5.42/5.42 1.92/1.92 1.44/1.44 1.28/1.28 1.26/1.26 +# CBC 4.38/4.43 4.15/1.43 4.07/1.32 4.07/1.29 4.06/1.28 +# CCM 5.66/9.42 4.42/5.41 4.16/4.40 4.09/4.15 4.06/4.07 +# OFB 5.42/5.42 4.64/4.64 4.44/4.44 4.39/4.39 4.38/4.38 +# CFB 5.73/5.85 5.56/5.62 5.48/5.56 5.47/5.55 5.47/5.55 +# +# ECB, CTR, CBC and CCM results are free from EVP overhead. This means +# that otherwise used 'openssl speed -evp aes-128-??? -engine aesni +# [-decrypt]' will exhibit 10-15% worse results for smaller blocks. +# The results were collected with specially crafted speed.c benchmark +# in order to compare them with results reported in "Intel Advanced +# Encryption Standard (AES) New Instruction Set" White Paper Revision +# 3.0 dated May 2010. All above results are consistently better. This +# module also provides better performance for block sizes smaller than +# 128 bytes in points *not* represented in the above table. +# +# Looking at the results for 8-KB buffer. +# +# CFB and OFB results are far from the limit, because implementation +# uses "generic" CRYPTO_[c|o]fb128_encrypt interfaces relying on +# single-block aesni_encrypt, which is not the most optimal way to go. +# CBC encrypt result is unexpectedly high and there is no documented +# explanation for it. Seemingly there is a small penalty for feeding +# the result back to AES unit the way it's done in CBC mode. There is +# nothing one can do and the result appears optimal. CCM result is +# identical to CBC, because CBC-MAC is essentially CBC encrypt without +# saving output. CCM CTR "stays invisible," because it's neatly +# interleaved wih CBC-MAC. This provides ~30% improvement over +# "straghtforward" CCM implementation with CTR and CBC-MAC performed +# disjointly. Parallelizable modes practically achieve the theoretical +# limit. +# +# Looking at how results vary with buffer size. +# +# Curves are practically saturated at 1-KB buffer size. In most cases +# "256-byte" performance is >95%, and "64-byte" is ~90% of "8-KB" one. +# CTR curve doesn't follow this pattern and is "slowest" changing one +# with "256-byte" result being 87% of "8-KB." This is because overhead +# in CTR mode is most computationally intensive. Small-block CCM +# decrypt is slower than encrypt, because first CTR and last CBC-MAC +# iterations can't be interleaved. +# +# Results for 192- and 256-bit keys. +# +# EVP-free results were observed to scale perfectly with number of +# rounds for larger block sizes, i.e. 192-bit result being 10/12 times +# lower and 256-bit one - 10/14. Well, in CBC encrypt case differences +# are a tad smaller, because the above mentioned penalty biases all +# results by same constant value. In similar way function call +# overhead affects small-block performance, as well as OFB and CFB +# results. Differences are not large, most common coefficients are +# 10/11.7 and 10/13.4 (as opposite to 10/12.0 and 10/14.0), but one +# observe even 10/11.2 and 10/12.4 (CTR, OFB, CFB)... + +# January 2011 +# +# While Westmere processor features 6 cycles latency for aes[enc|dec] +# instructions, which can be scheduled every second cycle, Sandy +# Bridge spends 8 cycles per instruction, but it can schedule them +# every cycle. This means that code targeting Westmere would perform +# suboptimally on Sandy Bridge. Therefore this update. +# +# In addition, non-parallelizable CBC encrypt (as well as CCM) is +# optimized. Relative improvement might appear modest, 8% on Westmere, +# but in absolute terms it's 3.77 cycles per byte encrypted with +# 128-bit key on Westmere, and 5.07 - on Sandy Bridge. These numbers +# should be compared to asymptotic limits of 3.75 for Westmere and +# 5.00 for Sandy Bridge. Actually, the fact that they get this close +# to asymptotic limits is quite amazing. Indeed, the limit is +# calculated as latency times number of rounds, 10 for 128-bit key, +# and divided by 16, the number of bytes in block, or in other words +# it accounts *solely* for aesenc instructions. But there are extra +# instructions, and numbers so close to the asymptotic limits mean +# that it's as if it takes as little as *one* additional cycle to +# execute all of them. How is it possible? It is possible thanks to +# out-of-order execution logic, which manages to overlap post- +# processing of previous block, things like saving the output, with +# actual encryption of current block, as well as pre-processing of +# current block, things like fetching input and xor-ing it with +# 0-round element of the key schedule, with actual encryption of +# previous block. Keep this in mind... +# +# For parallelizable modes, such as ECB, CBC decrypt, CTR, higher +# performance is achieved by interleaving instructions working on +# independent blocks. In which case asymptotic limit for such modes +# can be obtained by dividing above mentioned numbers by AES +# instructions' interleave factor. Westmere can execute at most 3 +# instructions at a time, meaning that optimal interleave factor is 3, +# and that's where the "magic" number of 1.25 come from. "Optimal +# interleave factor" means that increase of interleave factor does +# not improve performance. The formula has proven to reflect reality +# pretty well on Westmere... Sandy Bridge on the other hand can +# execute up to 8 AES instructions at a time, so how does varying +# interleave factor affect the performance? Here is table for ECB +# (numbers are cycles per byte processed with 128-bit key): +# +# instruction interleave factor 3x 6x 8x +# theoretical asymptotic limit 1.67 0.83 0.625 +# measured performance for 8KB block 1.05 0.86 0.84 +# +# "as if" interleave factor 4.7x 5.8x 6.0x +# +# Further data for other parallelizable modes: +# +# CBC decrypt 1.16 0.93 0.93 +# CTR 1.14 0.91 n/a +# +# Well, given 3x column it's probably inappropriate to call the limit +# asymptotic, if it can be surpassed, isn't it? What happens there? +# Rewind to CBC paragraph for the answer. Yes, out-of-order execution +# magic is responsible for this. Processor overlaps not only the +# additional instructions with AES ones, but even AES instuctions +# processing adjacent triplets of independent blocks. In the 6x case +# additional instructions still claim disproportionally small amount +# of additional cycles, but in 8x case number of instructions must be +# a tad too high for out-of-order logic to cope with, and AES unit +# remains underutilized... As you can see 8x interleave is hardly +# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl +# utilizies 6x interleave because of limited register bank capacity. +# +# Higher interleave factors do have negative impact on Westmere +# performance. While for ECB mode it's negligible ~1.5%, other +# parallelizables perform ~5% worse, which is outweighed by ~25% +# improvement on Sandy Bridge. To balance regression on Westmere +# CTR mode was implemented with 6x aesenc interleave factor. + +# April 2011 +# +# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing +# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like +# in CTR mode AES instruction interleave factor was chosen to be 6x. + +$PREFIX="aesni"; # if $PREFIX is set to "AES", the script + # generates drop-in replacement for + # crypto/aes/asm/aes-x86_64.pl:-) + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$movkey = $PREFIX eq "aesni" ? "movups" : "movups"; +@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order + +$code=".text\n"; + +$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! +# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... +$inp="%rdi"; +$out="%rsi"; +$len="%rdx"; +$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! +$ivp="%r8"; # cbc, ctr, ... + +$rnds_="%r10d"; # backup copy for $rounds +$key_="%r11"; # backup copy for $key + +# %xmm register layout +$rndkey0="%xmm0"; $rndkey1="%xmm1"; +$inout0="%xmm2"; $inout1="%xmm3"; +$inout2="%xmm4"; $inout3="%xmm5"; +$inout4="%xmm6"; $inout5="%xmm7"; +$inout6="%xmm8"; $inout7="%xmm9"; + +$in2="%xmm6"; $in1="%xmm7"; # used in CBC decrypt, CTR, ... +$in0="%xmm8"; $iv="%xmm9"; + +# Inline version of internal aesni_[en|de]crypt1. +# +# Why folded loop? Because aes[enc|dec] is slow enough to accommodate +# cycles which take care of loop variables... +{ my $sn; +sub aesni_generate1 { +my ($p,$key,$rounds,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); +++$sn; +$code.=<<___; + $movkey ($key),$rndkey0 + $movkey 16($key),$rndkey1 +___ +$code.=<<___ if (defined($ivec)); + xorps $rndkey0,$ivec + lea 32($key),$key + xorps $ivec,$inout +___ +$code.=<<___ if (!defined($ivec)); + lea 32($key),$key + xorps $rndkey0,$inout +___ +$code.=<<___; +.Loop_${p}1_$sn: + aes${p} $rndkey1,$inout + dec $rounds + $movkey ($key),$rndkey1 + lea 16($key),$key + jnz .Loop_${p}1_$sn # loop body is 16 bytes + aes${p}last $rndkey1,$inout +___ +}} +# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); +# +{ my ($inp,$out,$key) = @_4args; + +$code.=<<___; +.globl ${PREFIX}_encrypt +.type ${PREFIX}_encrypt,\@abi-omnipotent +.align 16 +${PREFIX}_encrypt: + movups ($inp),$inout0 # load input + mov 240($key),$rounds # key->rounds +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + movups $inout0,($out) # output + ret +.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt + +.globl ${PREFIX}_decrypt +.type ${PREFIX}_decrypt,\@abi-omnipotent +.align 16 +${PREFIX}_decrypt: + movups ($inp),$inout0 # load input + mov 240($key),$rounds # key->rounds +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + movups $inout0,($out) # output + ret +.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt +___ +} + +# _aesni_[en|de]cryptN are private interfaces, N denotes interleave +# factor. Why 3x subroutine were originally used in loops? Even though +# aes[enc|dec] latency was originally 6, it could be scheduled only +# every *2nd* cycle. Thus 3x interleave was the one providing optimal +# utilization, i.e. when subroutine's throughput is virtually same as +# of non-interleaved subroutine [for number of input blocks up to 3]. +# This is why it makes no sense to implement 2x subroutine. +# aes[enc|dec] latency in next processor generation is 8, but the +# instructions can be scheduled every cycle. Optimal interleave for +# new processor is therefore 8x... +sub aesni_generate3 { +my $dir=shift; +# As already mentioned it takes in $key and $rounds, which are *not* +# preserved. $inout[0-2] is cipher/clear text... +$code.=<<___; +.type _aesni_${dir}rypt3,\@abi-omnipotent +.align 16 +_aesni_${dir}rypt3: + $movkey ($key),$rndkey0 + shr \$1,$rounds + $movkey 16($key),$rndkey1 + lea 32($key),$key + xorps $rndkey0,$inout0 + xorps $rndkey0,$inout1 + xorps $rndkey0,$inout2 + $movkey ($key),$rndkey0 + +.L${dir}_loop3: + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + dec $rounds + aes${dir} $rndkey1,$inout2 + $movkey 16($key),$rndkey1 + aes${dir} $rndkey0,$inout0 + aes${dir} $rndkey0,$inout1 + lea 32($key),$key + aes${dir} $rndkey0,$inout2 + $movkey ($key),$rndkey0 + jnz .L${dir}_loop3 + + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + aes${dir} $rndkey1,$inout2 + aes${dir}last $rndkey0,$inout0 + aes${dir}last $rndkey0,$inout1 + aes${dir}last $rndkey0,$inout2 + ret +.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 +___ +} +# 4x interleave is implemented to improve small block performance, +# most notably [and naturally] 4 block by ~30%. One can argue that one +# should have implemented 5x as well, but improvement would be <20%, +# so it's not worth it... +sub aesni_generate4 { +my $dir=shift; +# As already mentioned it takes in $key and $rounds, which are *not* +# preserved. $inout[0-3] is cipher/clear text... +$code.=<<___; +.type _aesni_${dir}rypt4,\@abi-omnipotent +.align 16 +_aesni_${dir}rypt4: + $movkey ($key),$rndkey0 + shr \$1,$rounds + $movkey 16($key),$rndkey1 + lea 32($key),$key + xorps $rndkey0,$inout0 + xorps $rndkey0,$inout1 + xorps $rndkey0,$inout2 + xorps $rndkey0,$inout3 + $movkey ($key),$rndkey0 + +.L${dir}_loop4: + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + dec $rounds + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + $movkey 16($key),$rndkey1 + aes${dir} $rndkey0,$inout0 + aes${dir} $rndkey0,$inout1 + lea 32($key),$key + aes${dir} $rndkey0,$inout2 + aes${dir} $rndkey0,$inout3 + $movkey ($key),$rndkey0 + jnz .L${dir}_loop4 + + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir}last $rndkey0,$inout0 + aes${dir}last $rndkey0,$inout1 + aes${dir}last $rndkey0,$inout2 + aes${dir}last $rndkey0,$inout3 + ret +.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 +___ +} +sub aesni_generate6 { +my $dir=shift; +# As already mentioned it takes in $key and $rounds, which are *not* +# preserved. $inout[0-5] is cipher/clear text... +$code.=<<___; +.type _aesni_${dir}rypt6,\@abi-omnipotent +.align 16 +_aesni_${dir}rypt6: + $movkey ($key),$rndkey0 + shr \$1,$rounds + $movkey 16($key),$rndkey1 + lea 32($key),$key + xorps $rndkey0,$inout0 + pxor $rndkey0,$inout1 + aes${dir} $rndkey1,$inout0 + pxor $rndkey0,$inout2 + aes${dir} $rndkey1,$inout1 + pxor $rndkey0,$inout3 + aes${dir} $rndkey1,$inout2 + pxor $rndkey0,$inout4 + aes${dir} $rndkey1,$inout3 + pxor $rndkey0,$inout5 + dec $rounds + aes${dir} $rndkey1,$inout4 + $movkey ($key),$rndkey0 + aes${dir} $rndkey1,$inout5 + jmp .L${dir}_loop6_enter +.align 16 +.L${dir}_loop6: + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + dec $rounds + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout5 +.L${dir}_loop6_enter: # happens to be 16-byte aligned + $movkey 16($key),$rndkey1 + aes${dir} $rndkey0,$inout0 + aes${dir} $rndkey0,$inout1 + lea 32($key),$key + aes${dir} $rndkey0,$inout2 + aes${dir} $rndkey0,$inout3 + aes${dir} $rndkey0,$inout4 + aes${dir} $rndkey0,$inout5 + $movkey ($key),$rndkey0 + jnz .L${dir}_loop6 + + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout5 + aes${dir}last $rndkey0,$inout0 + aes${dir}last $rndkey0,$inout1 + aes${dir}last $rndkey0,$inout2 + aes${dir}last $rndkey0,$inout3 + aes${dir}last $rndkey0,$inout4 + aes${dir}last $rndkey0,$inout5 + ret +.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 +___ +} +sub aesni_generate8 { +my $dir=shift; +# As already mentioned it takes in $key and $rounds, which are *not* +# preserved. $inout[0-7] is cipher/clear text... +$code.=<<___; +.type _aesni_${dir}rypt8,\@abi-omnipotent +.align 16 +_aesni_${dir}rypt8: + $movkey ($key),$rndkey0 + shr \$1,$rounds + $movkey 16($key),$rndkey1 + lea 32($key),$key + xorps $rndkey0,$inout0 + xorps $rndkey0,$inout1 + aes${dir} $rndkey1,$inout0 + pxor $rndkey0,$inout2 + aes${dir} $rndkey1,$inout1 + pxor $rndkey0,$inout3 + aes${dir} $rndkey1,$inout2 + pxor $rndkey0,$inout4 + aes${dir} $rndkey1,$inout3 + pxor $rndkey0,$inout5 + dec $rounds + aes${dir} $rndkey1,$inout4 + pxor $rndkey0,$inout6 + aes${dir} $rndkey1,$inout5 + pxor $rndkey0,$inout7 + $movkey ($key),$rndkey0 + aes${dir} $rndkey1,$inout6 + aes${dir} $rndkey1,$inout7 + $movkey 16($key),$rndkey1 + jmp .L${dir}_loop8_enter +.align 16 +.L${dir}_loop8: + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + dec $rounds + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout5 + aes${dir} $rndkey1,$inout6 + aes${dir} $rndkey1,$inout7 + $movkey 16($key),$rndkey1 +.L${dir}_loop8_enter: # happens to be 16-byte aligned + aes${dir} $rndkey0,$inout0 + aes${dir} $rndkey0,$inout1 + lea 32($key),$key + aes${dir} $rndkey0,$inout2 + aes${dir} $rndkey0,$inout3 + aes${dir} $rndkey0,$inout4 + aes${dir} $rndkey0,$inout5 + aes${dir} $rndkey0,$inout6 + aes${dir} $rndkey0,$inout7 + $movkey ($key),$rndkey0 + jnz .L${dir}_loop8 + + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout5 + aes${dir} $rndkey1,$inout6 + aes${dir} $rndkey1,$inout7 + aes${dir}last $rndkey0,$inout0 + aes${dir}last $rndkey0,$inout1 + aes${dir}last $rndkey0,$inout2 + aes${dir}last $rndkey0,$inout3 + aes${dir}last $rndkey0,$inout4 + aes${dir}last $rndkey0,$inout5 + aes${dir}last $rndkey0,$inout6 + aes${dir}last $rndkey0,$inout7 + ret +.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 +___ +} +&aesni_generate3("enc") if ($PREFIX eq "aesni"); +&aesni_generate3("dec"); +&aesni_generate4("enc") if ($PREFIX eq "aesni"); +&aesni_generate4("dec"); +&aesni_generate6("enc") if ($PREFIX eq "aesni"); +&aesni_generate6("dec"); +&aesni_generate8("enc") if ($PREFIX eq "aesni"); +&aesni_generate8("dec"); + +if ($PREFIX eq "aesni") { +######################################################################## +# void aesni_ecb_encrypt (const void *in, void *out, +# size_t length, const AES_KEY *key, +# int enc); +$code.=<<___; +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,\@function,5 +.align 16 +aesni_ecb_encrypt: + and \$-16,$len + jz .Lecb_ret + + mov 240($key),$rounds # key->rounds + $movkey ($key),$rndkey0 + mov $key,$key_ # backup $key + mov $rounds,$rnds_ # backup $rounds + test %r8d,%r8d # 5th argument + jz .Lecb_decrypt +#--------------------------- ECB ENCRYPT ------------------------------# + cmp \$0x80,$len + jb .Lecb_enc_tail + + movdqu ($inp),$inout0 + movdqu 0x10($inp),$inout1 + movdqu 0x20($inp),$inout2 + movdqu 0x30($inp),$inout3 + movdqu 0x40($inp),$inout4 + movdqu 0x50($inp),$inout5 + movdqu 0x60($inp),$inout6 + movdqu 0x70($inp),$inout7 + lea 0x80($inp),$inp + sub \$0x80,$len + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups $inout0,($out) + mov $key_,$key # restore $key + movdqu ($inp),$inout0 + mov $rnds_,$rounds # restore $rounds + movups $inout1,0x10($out) + movdqu 0x10($inp),$inout1 + movups $inout2,0x20($out) + movdqu 0x20($inp),$inout2 + movups $inout3,0x30($out) + movdqu 0x30($inp),$inout3 + movups $inout4,0x40($out) + movdqu 0x40($inp),$inout4 + movups $inout5,0x50($out) + movdqu 0x50($inp),$inout5 + movups $inout6,0x60($out) + movdqu 0x60($inp),$inout6 + movups $inout7,0x70($out) + lea 0x80($out),$out + movdqu 0x70($inp),$inout7 + lea 0x80($inp),$inp +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + sub \$0x80,$len + jnc .Lecb_enc_loop8 + + movups $inout0,($out) + mov $key_,$key # restore $key + movups $inout1,0x10($out) + mov $rnds_,$rounds # restore $rounds + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + movups $inout6,0x60($out) + movups $inout7,0x70($out) + lea 0x80($out),$out + add \$0x80,$len + jz .Lecb_ret + +.Lecb_enc_tail: + movups ($inp),$inout0 + cmp \$0x20,$len + jb .Lecb_enc_one + movups 0x10($inp),$inout1 + je .Lecb_enc_two + movups 0x20($inp),$inout2 + cmp \$0x40,$len + jb .Lecb_enc_three + movups 0x30($inp),$inout3 + je .Lecb_enc_four + movups 0x40($inp),$inout4 + cmp \$0x60,$len + jb .Lecb_enc_five + movups 0x50($inp),$inout5 + je .Lecb_enc_six + movdqu 0x60($inp),$inout6 + call _aesni_encrypt8 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + movups $inout6,0x60($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + movups $inout0,($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + xorps $inout2,$inout2 + call _aesni_encrypt3 + movups $inout0,($out) + movups $inout1,0x10($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps $inout5,$inout5 + call _aesni_encrypt6 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + jmp .Lecb_ret + #--------------------------- ECB DECRYPT ------------------------------# +.align 16 +.Lecb_decrypt: + cmp \$0x80,$len + jb .Lecb_dec_tail + + movdqu ($inp),$inout0 + movdqu 0x10($inp),$inout1 + movdqu 0x20($inp),$inout2 + movdqu 0x30($inp),$inout3 + movdqu 0x40($inp),$inout4 + movdqu 0x50($inp),$inout5 + movdqu 0x60($inp),$inout6 + movdqu 0x70($inp),$inout7 + lea 0x80($inp),$inp + sub \$0x80,$len + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups $inout0,($out) + mov $key_,$key # restore $key + movdqu ($inp),$inout0 + mov $rnds_,$rounds # restore $rounds + movups $inout1,0x10($out) + movdqu 0x10($inp),$inout1 + movups $inout2,0x20($out) + movdqu 0x20($inp),$inout2 + movups $inout3,0x30($out) + movdqu 0x30($inp),$inout3 + movups $inout4,0x40($out) + movdqu 0x40($inp),$inout4 + movups $inout5,0x50($out) + movdqu 0x50($inp),$inout5 + movups $inout6,0x60($out) + movdqu 0x60($inp),$inout6 + movups $inout7,0x70($out) + lea 0x80($out),$out + movdqu 0x70($inp),$inout7 + lea 0x80($inp),$inp +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + $movkey ($key_),$rndkey0 + sub \$0x80,$len + jnc .Lecb_dec_loop8 + + movups $inout0,($out) + mov $key_,$key # restore $key + movups $inout1,0x10($out) + mov $rnds_,$rounds # restore $rounds + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + movups $inout6,0x60($out) + movups $inout7,0x70($out) + lea 0x80($out),$out + add \$0x80,$len + jz .Lecb_ret + +.Lecb_dec_tail: + movups ($inp),$inout0 + cmp \$0x20,$len + jb .Lecb_dec_one + movups 0x10($inp),$inout1 + je .Lecb_dec_two + movups 0x20($inp),$inout2 + cmp \$0x40,$len + jb .Lecb_dec_three + movups 0x30($inp),$inout3 + je .Lecb_dec_four + movups 0x40($inp),$inout4 + cmp \$0x60,$len + jb .Lecb_dec_five + movups 0x50($inp),$inout5 + je .Lecb_dec_six + movups 0x60($inp),$inout6 + $movkey ($key),$rndkey0 + call _aesni_decrypt8 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + movups $inout6,0x60($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + movups $inout0,($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + xorps $inout2,$inout2 + call _aesni_decrypt3 + movups $inout0,($out) + movups $inout1,0x10($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps $inout5,$inout5 + call _aesni_decrypt6 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + +.Lecb_ret: + ret +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +___ + +{ +###################################################################### +# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, +# size_t blocks, const AES_KEY *key, +# const char *ivec,char *cmac); +# +# Handles only complete blocks, operates on 64-bit counter and +# does not update *ivec! Nor does it finalize CMAC value +# (see engine/eng_aesni.c for details) +# +{ +my $cmac="%r9"; # 6th argument + +my $increment="%xmm6"; +my $bswap_mask="%xmm7"; + +$code.=<<___; +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,\@function,6 +.align 16 +aesni_ccm64_encrypt_blocks: +___ +$code.=<<___ if ($win64); + lea -0x58(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) +.Lccm64_enc_body: +___ +$code.=<<___; + mov 240($key),$rounds # key->rounds + movdqu ($ivp),$iv + movdqa .Lincrement64(%rip),$increment + movdqa .Lbswap_mask(%rip),$bswap_mask + + shr \$1,$rounds + lea 0($key),$key_ + movdqu ($cmac),$inout1 + movdqa $iv,$inout0 + mov $rounds,$rnds_ + pshufb $bswap_mask,$iv + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + $movkey ($key_),$rndkey0 + mov $rnds_,$rounds + movups ($inp),$in0 # load inp + + xorps $rndkey0,$inout0 # counter + $movkey 16($key_),$rndkey1 + xorps $in0,$rndkey0 + lea 32($key_),$key + xorps $rndkey0,$inout1 # cmac^=inp + $movkey ($key),$rndkey0 + +.Lccm64_enc2_loop: + aesenc $rndkey1,$inout0 + dec $rounds + aesenc $rndkey1,$inout1 + $movkey 16($key),$rndkey1 + aesenc $rndkey0,$inout0 + lea 32($key),$key + aesenc $rndkey0,$inout1 + $movkey 0($key),$rndkey0 + jnz .Lccm64_enc2_loop + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + paddq $increment,$iv + aesenclast $rndkey0,$inout0 + aesenclast $rndkey0,$inout1 + + dec $len + lea 16($inp),$inp + xorps $inout0,$in0 # inp ^= E(iv) + movdqa $iv,$inout0 + movups $in0,($out) # save output + lea 16($out),$out + pshufb $bswap_mask,$inout0 + jnz .Lccm64_enc_outer + + movups $inout1,($cmac) +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + lea 0x58(%rsp),%rsp +.Lccm64_enc_ret: +___ +$code.=<<___; + ret +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +___ +###################################################################### +$code.=<<___; +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,\@function,6 +.align 16 +aesni_ccm64_decrypt_blocks: +___ +$code.=<<___ if ($win64); + lea -0x58(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) +.Lccm64_dec_body: +___ +$code.=<<___; + mov 240($key),$rounds # key->rounds + movups ($ivp),$iv + movdqu ($cmac),$inout1 + movdqa .Lincrement64(%rip),$increment + movdqa .Lbswap_mask(%rip),$bswap_mask + + movaps $iv,$inout0 + mov $rounds,$rnds_ + mov $key,$key_ + pshufb $bswap_mask,$iv +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + movups ($inp),$in0 # load inp + paddq $increment,$iv + lea 16($inp),$inp + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps $inout0,$in0 # inp ^= E(iv) + movdqa $iv,$inout0 + mov $rnds_,$rounds + movups $in0,($out) # save output + lea 16($out),$out + pshufb $bswap_mask,$inout0 + + sub \$1,$len + jz .Lccm64_dec_break + + $movkey ($key_),$rndkey0 + shr \$1,$rounds + $movkey 16($key_),$rndkey1 + xorps $rndkey0,$in0 + lea 32($key_),$key + xorps $rndkey0,$inout0 + xorps $in0,$inout1 # cmac^=out + $movkey ($key),$rndkey0 + +.Lccm64_dec2_loop: + aesenc $rndkey1,$inout0 + dec $rounds + aesenc $rndkey1,$inout1 + $movkey 16($key),$rndkey1 + aesenc $rndkey0,$inout0 + lea 32($key),$key + aesenc $rndkey0,$inout1 + $movkey 0($key),$rndkey0 + jnz .Lccm64_dec2_loop + movups ($inp),$in0 # load inp + paddq $increment,$iv + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + lea 16($inp),$inp + aesenclast $rndkey0,$inout0 + aesenclast $rndkey0,$inout1 + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + #xorps $in0,$inout1 # cmac^=out +___ + &aesni_generate1("enc",$key_,$rounds,$inout1,$in0); +$code.=<<___; + movups $inout1,($cmac) +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + lea 0x58(%rsp),%rsp +.Lccm64_dec_ret: +___ +$code.=<<___; + ret +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +___ +} +###################################################################### +# void aesni_ctr32_encrypt_blocks (const void *in, void *out, +# size_t blocks, const AES_KEY *key, +# const char *ivec); +# +# Handles only complete blocks, operates on 32-bit counter and +# does not update *ivec! (see engine/eng_aesni.c for details) +# +{ +my $reserved = $win64?0:-0x28; +my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); +my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); +my $bswap_mask="%xmm15"; + +$code.=<<___; +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,\@function,5 +.align 16 +aesni_ctr32_encrypt_blocks: +___ +$code.=<<___ if ($win64); + lea -0xc8(%rsp),%rsp + movaps %xmm6,0x20(%rsp) + movaps %xmm7,0x30(%rsp) + movaps %xmm8,0x40(%rsp) + movaps %xmm9,0x50(%rsp) + movaps %xmm10,0x60(%rsp) + movaps %xmm11,0x70(%rsp) + movaps %xmm12,0x80(%rsp) + movaps %xmm13,0x90(%rsp) + movaps %xmm14,0xa0(%rsp) + movaps %xmm15,0xb0(%rsp) +.Lctr32_body: +___ +$code.=<<___; + cmp \$1,$len + je .Lctr32_one_shortcut + + movdqu ($ivp),$ivec + movdqa .Lbswap_mask(%rip),$bswap_mask + xor $rounds,$rounds + pextrd \$3,$ivec,$rnds_ # pull 32-bit counter + pinsrd \$3,$rounds,$ivec # wipe 32-bit counter + + mov 240($key),$rounds # key->rounds + bswap $rnds_ + pxor $iv0,$iv0 # vector of 3 32-bit counters + pxor $iv1,$iv1 # vector of 3 32-bit counters + pinsrd \$0,$rnds_,$iv0 + lea 3($rnds_),$key_ + pinsrd \$0,$key_,$iv1 + inc $rnds_ + pinsrd \$1,$rnds_,$iv0 + inc $key_ + pinsrd \$1,$key_,$iv1 + inc $rnds_ + pinsrd \$2,$rnds_,$iv0 + inc $key_ + pinsrd \$2,$key_,$iv1 + movdqa $iv0,$reserved(%rsp) + pshufb $bswap_mask,$iv0 + movdqa $iv1,`$reserved+0x10`(%rsp) + pshufb $bswap_mask,$iv1 + + pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword + pshufd \$`2<<6`,$iv0,$inout1 + pshufd \$`1<<6`,$iv0,$inout2 + cmp \$6,$len + jb .Lctr32_tail + shr \$1,$rounds + mov $key,$key_ # backup $key + mov $rounds,$rnds_ # backup $rounds + sub \$6,$len + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + pshufd \$`3<<6`,$iv1,$inout3 + por $ivec,$inout0 # merge counter-less ivec + $movkey ($key_),$rndkey0 + pshufd \$`2<<6`,$iv1,$inout4 + por $ivec,$inout1 + $movkey 16($key_),$rndkey1 + pshufd \$`1<<6`,$iv1,$inout5 + por $ivec,$inout2 + por $ivec,$inout3 + xorps $rndkey0,$inout0 + por $ivec,$inout4 + por $ivec,$inout5 + + # inline _aesni_encrypt6 and interleave last rounds + # with own code... + + pxor $rndkey0,$inout1 + aesenc $rndkey1,$inout0 + lea 32($key_),$key + pxor $rndkey0,$inout2 + aesenc $rndkey1,$inout1 + movdqa .Lincrement32(%rip),$iv1 + pxor $rndkey0,$inout3 + aesenc $rndkey1,$inout2 + movdqa $reserved(%rsp),$iv0 + pxor $rndkey0,$inout4 + aesenc $rndkey1,$inout3 + pxor $rndkey0,$inout5 + $movkey ($key),$rndkey0 + dec $rounds + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 + jmp .Lctr32_enc_loop6_enter +.align 16 +.Lctr32_enc_loop6: + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + dec $rounds + aesenc $rndkey1,$inout2 + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 +.Lctr32_enc_loop6_enter: + $movkey 16($key),$rndkey1 + aesenc $rndkey0,$inout0 + aesenc $rndkey0,$inout1 + lea 32($key),$key + aesenc $rndkey0,$inout2 + aesenc $rndkey0,$inout3 + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + $movkey ($key),$rndkey0 + jnz .Lctr32_enc_loop6 + + aesenc $rndkey1,$inout0 + paddd $iv1,$iv0 # increment counter vector + aesenc $rndkey1,$inout1 + paddd `$reserved+0x10`(%rsp),$iv1 + aesenc $rndkey1,$inout2 + movdqa $iv0,$reserved(%rsp) # save counter vector + aesenc $rndkey1,$inout3 + movdqa $iv1,`$reserved+0x10`(%rsp) + aesenc $rndkey1,$inout4 + pshufb $bswap_mask,$iv0 # byte swap + aesenc $rndkey1,$inout5 + pshufb $bswap_mask,$iv1 + + aesenclast $rndkey0,$inout0 + movups ($inp),$in0 # load input + aesenclast $rndkey0,$inout1 + movups 0x10($inp),$in1 + aesenclast $rndkey0,$inout2 + movups 0x20($inp),$in2 + aesenclast $rndkey0,$inout3 + movups 0x30($inp),$in3 + aesenclast $rndkey0,$inout4 + movups 0x40($inp),$rndkey1 + aesenclast $rndkey0,$inout5 + movups 0x50($inp),$rndkey0 + lea 0x60($inp),$inp + + xorps $inout0,$in0 # xor + pshufd \$`3<<6`,$iv0,$inout0 + xorps $inout1,$in1 + pshufd \$`2<<6`,$iv0,$inout1 + movups $in0,($out) # store output + xorps $inout2,$in2 + pshufd \$`1<<6`,$iv0,$inout2 + movups $in1,0x10($out) + xorps $inout3,$in3 + movups $in2,0x20($out) + xorps $inout4,$rndkey1 + movups $in3,0x30($out) + xorps $inout5,$rndkey0 + movups $rndkey1,0x40($out) + movups $rndkey0,0x50($out) + lea 0x60($out),$out + mov $rnds_,$rounds + sub \$6,$len + jnc .Lctr32_loop6 + + add \$6,$len + jz .Lctr32_done + mov $key_,$key # restore $key + lea 1($rounds,$rounds),$rounds # restore original value + +.Lctr32_tail: + por $ivec,$inout0 + movups ($inp),$in0 + cmp \$2,$len + jb .Lctr32_one + + por $ivec,$inout1 + movups 0x10($inp),$in1 + je .Lctr32_two + + pshufd \$`3<<6`,$iv1,$inout3 + por $ivec,$inout2 + movups 0x20($inp),$in2 + cmp \$4,$len + jb .Lctr32_three + + pshufd \$`2<<6`,$iv1,$inout4 + por $ivec,$inout3 + movups 0x30($inp),$in3 + je .Lctr32_four + + por $ivec,$inout4 + xorps $inout5,$inout5 + + call _aesni_encrypt6 + + movups 0x40($inp),$rndkey1 + xorps $inout0,$in0 + xorps $inout1,$in1 + movups $in0,($out) + xorps $inout2,$in2 + movups $in1,0x10($out) + xorps $inout3,$in3 + movups $in2,0x20($out) + xorps $inout4,$rndkey1 + movups $in3,0x30($out) + movups $rndkey1,0x40($out) + jmp .Lctr32_done + +.align 16 +.Lctr32_one_shortcut: + movups ($ivp),$inout0 + movups ($inp),$in0 + mov 240($key),$rounds # key->rounds +.Lctr32_one: +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + xorps $inout0,$in0 + movups $in0,($out) + jmp .Lctr32_done + +.align 16 +.Lctr32_two: + xorps $inout2,$inout2 + call _aesni_encrypt3 + xorps $inout0,$in0 + xorps $inout1,$in1 + movups $in0,($out) + movups $in1,0x10($out) + jmp .Lctr32_done + +.align 16 +.Lctr32_three: + call _aesni_encrypt3 + xorps $inout0,$in0 + xorps $inout1,$in1 + movups $in0,($out) + xorps $inout2,$in2 + movups $in1,0x10($out) + movups $in2,0x20($out) + jmp .Lctr32_done + +.align 16 +.Lctr32_four: + call _aesni_encrypt4 + xorps $inout0,$in0 + xorps $inout1,$in1 + movups $in0,($out) + xorps $inout2,$in2 + movups $in1,0x10($out) + xorps $inout3,$in3 + movups $in2,0x20($out) + movups $in3,0x30($out) + +.Lctr32_done: +___ +$code.=<<___ if ($win64); + movaps 0x20(%rsp),%xmm6 + movaps 0x30(%rsp),%xmm7 + movaps 0x40(%rsp),%xmm8 + movaps 0x50(%rsp),%xmm9 + movaps 0x60(%rsp),%xmm10 + movaps 0x70(%rsp),%xmm11 + movaps 0x80(%rsp),%xmm12 + movaps 0x90(%rsp),%xmm13 + movaps 0xa0(%rsp),%xmm14 + movaps 0xb0(%rsp),%xmm15 + lea 0xc8(%rsp),%rsp +.Lctr32_ret: +___ +$code.=<<___; + ret +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +___ +} + +###################################################################### +# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2 +# const unsigned char iv[16]); +# +{ +my @tweak=map("%xmm$_",(10..15)); +my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); +my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); +my $frame_size = 0x68 + ($win64?160:0); + +$code.=<<___; +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,\@function,6 +.align 16 +aesni_xts_encrypt: + lea -$frame_size(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,0x60(%rsp) + movaps %xmm7,0x70(%rsp) + movaps %xmm8,0x80(%rsp) + movaps %xmm9,0x90(%rsp) + movaps %xmm10,0xa0(%rsp) + movaps %xmm11,0xb0(%rsp) + movaps %xmm12,0xc0(%rsp) + movaps %xmm13,0xd0(%rsp) + movaps %xmm14,0xe0(%rsp) + movaps %xmm15,0xf0(%rsp) +.Lxts_enc_body: +___ +$code.=<<___; + movups ($ivp),@tweak[5] # load clear-text tweak + mov 240(%r8),$rounds # key2->rounds + mov 240($key),$rnds_ # key1->rounds +___ + # generate the tweak + &aesni_generate1("enc",$key2,$rounds,@tweak[5]); +$code.=<<___; + mov $key,$key_ # backup $key + mov $rnds_,$rounds # backup $rounds + mov $len,$len_ # backup $len + and \$-16,$len + + movdqa .Lxts_magic(%rip),$twmask + pxor $twtmp,$twtmp + pcmpgtd @tweak[5],$twtmp # broadcast upper bits +___ + for ($i=0;$i<4;$i++) { + $code.=<<___; + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[$i] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + pand $twmask,$twres # isolate carry and residue + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + pxor $twres,@tweak[5] +___ + } +$code.=<<___; + sub \$16*6,$len + jc .Lxts_enc_short + + shr \$1,$rounds + sub \$1,$rounds + mov $rounds,$rnds_ + jmp .Lxts_enc_grandloop + +.align 16 +.Lxts_enc_grandloop: + pshufd \$0x13,$twtmp,$twres + movdqa @tweak[5],@tweak[4] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqu `16*0`($inp),$inout0 # load input + pand $twmask,$twres # isolate carry and residue + movdqu `16*1`($inp),$inout1 + pxor $twres,@tweak[5] + + movdqu `16*2`($inp),$inout2 + pxor @tweak[0],$inout0 # input^=tweak + movdqu `16*3`($inp),$inout3 + pxor @tweak[1],$inout1 + movdqu `16*4`($inp),$inout4 + pxor @tweak[2],$inout2 + movdqu `16*5`($inp),$inout5 + lea `16*6`($inp),$inp + pxor @tweak[3],$inout3 + $movkey ($key_),$rndkey0 + pxor @tweak[4],$inout4 + pxor @tweak[5],$inout5 + + # inline _aesni_encrypt6 and interleave first and last rounds + # with own code... + $movkey 16($key_),$rndkey1 + pxor $rndkey0,$inout0 + pxor $rndkey0,$inout1 + movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks + aesenc $rndkey1,$inout0 + lea 32($key_),$key + pxor $rndkey0,$inout2 + movdqa @tweak[1],`16*1`(%rsp) + aesenc $rndkey1,$inout1 + pxor $rndkey0,$inout3 + movdqa @tweak[2],`16*2`(%rsp) + aesenc $rndkey1,$inout2 + pxor $rndkey0,$inout4 + movdqa @tweak[3],`16*3`(%rsp) + aesenc $rndkey1,$inout3 + pxor $rndkey0,$inout5 + $movkey ($key),$rndkey0 + dec $rounds + movdqa @tweak[4],`16*4`(%rsp) + aesenc $rndkey1,$inout4 + movdqa @tweak[5],`16*5`(%rsp) + aesenc $rndkey1,$inout5 + pxor $twtmp,$twtmp + pcmpgtd @tweak[5],$twtmp + jmp .Lxts_enc_loop6_enter + +.align 16 +.Lxts_enc_loop6: + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + dec $rounds + aesenc $rndkey1,$inout2 + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 +.Lxts_enc_loop6_enter: + $movkey 16($key),$rndkey1 + aesenc $rndkey0,$inout0 + aesenc $rndkey0,$inout1 + lea 32($key),$key + aesenc $rndkey0,$inout2 + aesenc $rndkey0,$inout3 + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + $movkey ($key),$rndkey0 + jnz .Lxts_enc_loop6 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesenc $rndkey1,$inout0 + pand $twmask,$twres # isolate carry and residue + aesenc $rndkey1,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcast upper bits + aesenc $rndkey1,$inout2 + pxor $twres,@tweak[5] + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 + $movkey 16($key),$rndkey1 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[0] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesenc $rndkey0,$inout0 + pand $twmask,$twres # isolate carry and residue + aesenc $rndkey0,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesenc $rndkey0,$inout2 + pxor $twres,@tweak[5] + aesenc $rndkey0,$inout3 + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + $movkey 32($key),$rndkey0 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[1] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesenc $rndkey1,$inout0 + pand $twmask,$twres # isolate carry and residue + aesenc $rndkey1,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesenc $rndkey1,$inout2 + pxor $twres,@tweak[5] + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[2] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesenclast $rndkey0,$inout0 + pand $twmask,$twres # isolate carry and residue + aesenclast $rndkey0,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesenclast $rndkey0,$inout2 + pxor $twres,@tweak[5] + aesenclast $rndkey0,$inout3 + aesenclast $rndkey0,$inout4 + aesenclast $rndkey0,$inout5 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[3] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + xorps `16*0`(%rsp),$inout0 # output^=tweak + pand $twmask,$twres # isolate carry and residue + xorps `16*1`(%rsp),$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + pxor $twres,@tweak[5] + + xorps `16*2`(%rsp),$inout2 + movups $inout0,`16*0`($out) # write output + xorps `16*3`(%rsp),$inout3 + movups $inout1,`16*1`($out) + xorps `16*4`(%rsp),$inout4 + movups $inout2,`16*2`($out) + xorps `16*5`(%rsp),$inout5 + movups $inout3,`16*3`($out) + mov $rnds_,$rounds # restore $rounds + movups $inout4,`16*4`($out) + movups $inout5,`16*5`($out) + lea `16*6`($out),$out + sub \$16*6,$len + jnc .Lxts_enc_grandloop + + lea 3($rounds,$rounds),$rounds # restore original value + mov $key_,$key # restore $key + mov $rounds,$rnds_ # backup $rounds + +.Lxts_enc_short: + add \$16*6,$len + jz .Lxts_enc_done + + cmp \$0x20,$len + jb .Lxts_enc_one + je .Lxts_enc_two + + cmp \$0x40,$len + jb .Lxts_enc_three + je .Lxts_enc_four + + pshufd \$0x13,$twtmp,$twres + movdqa @tweak[5],@tweak[4] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqu ($inp),$inout0 + pand $twmask,$twres # isolate carry and residue + movdqu 16*1($inp),$inout1 + pxor $twres,@tweak[5] + + movdqu 16*2($inp),$inout2 + pxor @tweak[0],$inout0 + movdqu 16*3($inp),$inout3 + pxor @tweak[1],$inout1 + movdqu 16*4($inp),$inout4 + lea 16*5($inp),$inp + pxor @tweak[2],$inout2 + pxor @tweak[3],$inout3 + pxor @tweak[4],$inout4 + + call _aesni_encrypt6 + + xorps @tweak[0],$inout0 + movdqa @tweak[5],@tweak[0] + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + movdqu $inout0,($out) + xorps @tweak[3],$inout3 + movdqu $inout1,16*1($out) + xorps @tweak[4],$inout4 + movdqu $inout2,16*2($out) + movdqu $inout3,16*3($out) + movdqu $inout4,16*4($out) + lea 16*5($out),$out + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups ($inp),$inout0 + lea 16*1($inp),$inp + xorps @tweak[0],$inout0 +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + xorps @tweak[0],$inout0 + movdqa @tweak[1],@tweak[0] + movups $inout0,($out) + lea 16*1($out),$out + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups ($inp),$inout0 + movups 16($inp),$inout1 + lea 32($inp),$inp + xorps @tweak[0],$inout0 + xorps @tweak[1],$inout1 + + call _aesni_encrypt3 + + xorps @tweak[0],$inout0 + movdqa @tweak[2],@tweak[0] + xorps @tweak[1],$inout1 + movups $inout0,($out) + movups $inout1,16*1($out) + lea 16*2($out),$out + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups ($inp),$inout0 + movups 16*1($inp),$inout1 + movups 16*2($inp),$inout2 + lea 16*3($inp),$inp + xorps @tweak[0],$inout0 + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + + call _aesni_encrypt3 + + xorps @tweak[0],$inout0 + movdqa @tweak[3],@tweak[0] + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + movups $inout0,($out) + movups $inout1,16*1($out) + movups $inout2,16*2($out) + lea 16*3($out),$out + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups ($inp),$inout0 + movups 16*1($inp),$inout1 + movups 16*2($inp),$inout2 + xorps @tweak[0],$inout0 + movups 16*3($inp),$inout3 + lea 16*4($inp),$inp + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + xorps @tweak[3],$inout3 + + call _aesni_encrypt4 + + xorps @tweak[0],$inout0 + movdqa @tweak[5],@tweak[0] + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + movups $inout0,($out) + xorps @tweak[3],$inout3 + movups $inout1,16*1($out) + movups $inout2,16*2($out) + movups $inout3,16*3($out) + lea 16*4($out),$out + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + and \$15,$len_ + jz .Lxts_enc_ret + mov $len_,$len + +.Lxts_enc_steal: + movzb ($inp),%eax # borrow $rounds ... + movzb -16($out),%ecx # ... and $key + lea 1($inp),$inp + mov %al,-16($out) + mov %cl,0($out) + lea 1($out),$out + sub \$1,$len + jnz .Lxts_enc_steal + + sub $len_,$out # rewind $out + mov $key_,$key # restore $key + mov $rnds_,$rounds # restore $rounds + + movups -16($out),$inout0 + xorps @tweak[0],$inout0 +___ + &aesni_generate1("enc",$key,$rounds); +$code.=<<___; + xorps @tweak[0],$inout0 + movups $inout0,-16($out) + +.Lxts_enc_ret: +___ +$code.=<<___ if ($win64); + movaps 0x60(%rsp),%xmm6 + movaps 0x70(%rsp),%xmm7 + movaps 0x80(%rsp),%xmm8 + movaps 0x90(%rsp),%xmm9 + movaps 0xa0(%rsp),%xmm10 + movaps 0xb0(%rsp),%xmm11 + movaps 0xc0(%rsp),%xmm12 + movaps 0xd0(%rsp),%xmm13 + movaps 0xe0(%rsp),%xmm14 + movaps 0xf0(%rsp),%xmm15 +___ +$code.=<<___; + lea $frame_size(%rsp),%rsp +.Lxts_enc_epilogue: + ret +.size aesni_xts_encrypt,.-aesni_xts_encrypt +___ + +$code.=<<___; +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,\@function,6 +.align 16 +aesni_xts_decrypt: + lea -$frame_size(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,0x60(%rsp) + movaps %xmm7,0x70(%rsp) + movaps %xmm8,0x80(%rsp) + movaps %xmm9,0x90(%rsp) + movaps %xmm10,0xa0(%rsp) + movaps %xmm11,0xb0(%rsp) + movaps %xmm12,0xc0(%rsp) + movaps %xmm13,0xd0(%rsp) + movaps %xmm14,0xe0(%rsp) + movaps %xmm15,0xf0(%rsp) +.Lxts_dec_body: +___ +$code.=<<___; + movups ($ivp),@tweak[5] # load clear-text tweak + mov 240($key2),$rounds # key2->rounds + mov 240($key),$rnds_ # key1->rounds +___ + # generate the tweak + &aesni_generate1("enc",$key2,$rounds,@tweak[5]); +$code.=<<___; + xor %eax,%eax # if ($len%16) len-=16; + test \$15,$len + setnz %al + shl \$4,%rax + sub %rax,$len + + mov $key,$key_ # backup $key + mov $rnds_,$rounds # backup $rounds + mov $len,$len_ # backup $len + and \$-16,$len + + movdqa .Lxts_magic(%rip),$twmask + pxor $twtmp,$twtmp + pcmpgtd @tweak[5],$twtmp # broadcast upper bits +___ + for ($i=0;$i<4;$i++) { + $code.=<<___; + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[$i] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + pand $twmask,$twres # isolate carry and residue + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + pxor $twres,@tweak[5] +___ + } +$code.=<<___; + sub \$16*6,$len + jc .Lxts_dec_short + + shr \$1,$rounds + sub \$1,$rounds + mov $rounds,$rnds_ + jmp .Lxts_dec_grandloop + +.align 16 +.Lxts_dec_grandloop: + pshufd \$0x13,$twtmp,$twres + movdqa @tweak[5],@tweak[4] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqu `16*0`($inp),$inout0 # load input + pand $twmask,$twres # isolate carry and residue + movdqu `16*1`($inp),$inout1 + pxor $twres,@tweak[5] + + movdqu `16*2`($inp),$inout2 + pxor @tweak[0],$inout0 # input^=tweak + movdqu `16*3`($inp),$inout3 + pxor @tweak[1],$inout1 + movdqu `16*4`($inp),$inout4 + pxor @tweak[2],$inout2 + movdqu `16*5`($inp),$inout5 + lea `16*6`($inp),$inp + pxor @tweak[3],$inout3 + $movkey ($key_),$rndkey0 + pxor @tweak[4],$inout4 + pxor @tweak[5],$inout5 + + # inline _aesni_decrypt6 and interleave first and last rounds + # with own code... + $movkey 16($key_),$rndkey1 + pxor $rndkey0,$inout0 + pxor $rndkey0,$inout1 + movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks + aesdec $rndkey1,$inout0 + lea 32($key_),$key + pxor $rndkey0,$inout2 + movdqa @tweak[1],`16*1`(%rsp) + aesdec $rndkey1,$inout1 + pxor $rndkey0,$inout3 + movdqa @tweak[2],`16*2`(%rsp) + aesdec $rndkey1,$inout2 + pxor $rndkey0,$inout4 + movdqa @tweak[3],`16*3`(%rsp) + aesdec $rndkey1,$inout3 + pxor $rndkey0,$inout5 + $movkey ($key),$rndkey0 + dec $rounds + movdqa @tweak[4],`16*4`(%rsp) + aesdec $rndkey1,$inout4 + movdqa @tweak[5],`16*5`(%rsp) + aesdec $rndkey1,$inout5 + pxor $twtmp,$twtmp + pcmpgtd @tweak[5],$twtmp + jmp .Lxts_dec_loop6_enter + +.align 16 +.Lxts_dec_loop6: + aesdec $rndkey1,$inout0 + aesdec $rndkey1,$inout1 + dec $rounds + aesdec $rndkey1,$inout2 + aesdec $rndkey1,$inout3 + aesdec $rndkey1,$inout4 + aesdec $rndkey1,$inout5 +.Lxts_dec_loop6_enter: + $movkey 16($key),$rndkey1 + aesdec $rndkey0,$inout0 + aesdec $rndkey0,$inout1 + lea 32($key),$key + aesdec $rndkey0,$inout2 + aesdec $rndkey0,$inout3 + aesdec $rndkey0,$inout4 + aesdec $rndkey0,$inout5 + $movkey ($key),$rndkey0 + jnz .Lxts_dec_loop6 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesdec $rndkey1,$inout0 + pand $twmask,$twres # isolate carry and residue + aesdec $rndkey1,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcast upper bits + aesdec $rndkey1,$inout2 + pxor $twres,@tweak[5] + aesdec $rndkey1,$inout3 + aesdec $rndkey1,$inout4 + aesdec $rndkey1,$inout5 + $movkey 16($key),$rndkey1 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[0] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesdec $rndkey0,$inout0 + pand $twmask,$twres # isolate carry and residue + aesdec $rndkey0,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesdec $rndkey0,$inout2 + pxor $twres,@tweak[5] + aesdec $rndkey0,$inout3 + aesdec $rndkey0,$inout4 + aesdec $rndkey0,$inout5 + $movkey 32($key),$rndkey0 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[1] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesdec $rndkey1,$inout0 + pand $twmask,$twres # isolate carry and residue + aesdec $rndkey1,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesdec $rndkey1,$inout2 + pxor $twres,@tweak[5] + aesdec $rndkey1,$inout3 + aesdec $rndkey1,$inout4 + aesdec $rndkey1,$inout5 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[2] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + aesdeclast $rndkey0,$inout0 + pand $twmask,$twres # isolate carry and residue + aesdeclast $rndkey0,$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + aesdeclast $rndkey0,$inout2 + pxor $twres,@tweak[5] + aesdeclast $rndkey0,$inout3 + aesdeclast $rndkey0,$inout4 + aesdeclast $rndkey0,$inout5 + + pshufd \$0x13,$twtmp,$twres + pxor $twtmp,$twtmp + movdqa @tweak[5],@tweak[3] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + xorps `16*0`(%rsp),$inout0 # output^=tweak + pand $twmask,$twres # isolate carry and residue + xorps `16*1`(%rsp),$inout1 + pcmpgtd @tweak[5],$twtmp # broadcat upper bits + pxor $twres,@tweak[5] + + xorps `16*2`(%rsp),$inout2 + movups $inout0,`16*0`($out) # write output + xorps `16*3`(%rsp),$inout3 + movups $inout1,`16*1`($out) + xorps `16*4`(%rsp),$inout4 + movups $inout2,`16*2`($out) + xorps `16*5`(%rsp),$inout5 + movups $inout3,`16*3`($out) + mov $rnds_,$rounds # restore $rounds + movups $inout4,`16*4`($out) + movups $inout5,`16*5`($out) + lea `16*6`($out),$out + sub \$16*6,$len + jnc .Lxts_dec_grandloop + + lea 3($rounds,$rounds),$rounds # restore original value + mov $key_,$key # restore $key + mov $rounds,$rnds_ # backup $rounds + +.Lxts_dec_short: + add \$16*6,$len + jz .Lxts_dec_done + + cmp \$0x20,$len + jb .Lxts_dec_one + je .Lxts_dec_two + + cmp \$0x40,$len + jb .Lxts_dec_three + je .Lxts_dec_four + + pshufd \$0x13,$twtmp,$twres + movdqa @tweak[5],@tweak[4] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqu ($inp),$inout0 + pand $twmask,$twres # isolate carry and residue + movdqu 16*1($inp),$inout1 + pxor $twres,@tweak[5] + + movdqu 16*2($inp),$inout2 + pxor @tweak[0],$inout0 + movdqu 16*3($inp),$inout3 + pxor @tweak[1],$inout1 + movdqu 16*4($inp),$inout4 + lea 16*5($inp),$inp + pxor @tweak[2],$inout2 + pxor @tweak[3],$inout3 + pxor @tweak[4],$inout4 + + call _aesni_decrypt6 + + xorps @tweak[0],$inout0 + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + movdqu $inout0,($out) + xorps @tweak[3],$inout3 + movdqu $inout1,16*1($out) + xorps @tweak[4],$inout4 + movdqu $inout2,16*2($out) + pxor $twtmp,$twtmp + movdqu $inout3,16*3($out) + pcmpgtd @tweak[5],$twtmp + movdqu $inout4,16*4($out) + lea 16*5($out),$out + pshufd \$0x13,$twtmp,@tweak[1] # $twres + and \$15,$len_ + jz .Lxts_dec_ret + + movdqa @tweak[5],@tweak[0] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + pand $twmask,@tweak[1] # isolate carry and residue + pxor @tweak[5],@tweak[1] + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups ($inp),$inout0 + lea 16*1($inp),$inp + xorps @tweak[0],$inout0 +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + xorps @tweak[0],$inout0 + movdqa @tweak[1],@tweak[0] + movups $inout0,($out) + movdqa @tweak[2],@tweak[1] + lea 16*1($out),$out + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups ($inp),$inout0 + movups 16($inp),$inout1 + lea 32($inp),$inp + xorps @tweak[0],$inout0 + xorps @tweak[1],$inout1 + + call _aesni_decrypt3 + + xorps @tweak[0],$inout0 + movdqa @tweak[2],@tweak[0] + xorps @tweak[1],$inout1 + movdqa @tweak[3],@tweak[1] + movups $inout0,($out) + movups $inout1,16*1($out) + lea 16*2($out),$out + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups ($inp),$inout0 + movups 16*1($inp),$inout1 + movups 16*2($inp),$inout2 + lea 16*3($inp),$inp + xorps @tweak[0],$inout0 + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + + call _aesni_decrypt3 + + xorps @tweak[0],$inout0 + movdqa @tweak[3],@tweak[0] + xorps @tweak[1],$inout1 + movdqa @tweak[5],@tweak[1] + xorps @tweak[2],$inout2 + movups $inout0,($out) + movups $inout1,16*1($out) + movups $inout2,16*2($out) + lea 16*3($out),$out + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + pshufd \$0x13,$twtmp,$twres + movdqa @tweak[5],@tweak[4] + paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movups ($inp),$inout0 + pand $twmask,$twres # isolate carry and residue + movups 16*1($inp),$inout1 + pxor $twres,@tweak[5] + + movups 16*2($inp),$inout2 + xorps @tweak[0],$inout0 + movups 16*3($inp),$inout3 + lea 16*4($inp),$inp + xorps @tweak[1],$inout1 + xorps @tweak[2],$inout2 + xorps @tweak[3],$inout3 + + call _aesni_decrypt4 + + xorps @tweak[0],$inout0 + movdqa @tweak[4],@tweak[0] + xorps @tweak[1],$inout1 + movdqa @tweak[5],@tweak[1] + xorps @tweak[2],$inout2 + movups $inout0,($out) + xorps @tweak[3],$inout3 + movups $inout1,16*1($out) + movups $inout2,16*2($out) + movups $inout3,16*3($out) + lea 16*4($out),$out + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + and \$15,$len_ + jz .Lxts_dec_ret +.Lxts_dec_done2: + mov $len_,$len + mov $key_,$key # restore $key + mov $rnds_,$rounds # restore $rounds + + movups ($inp),$inout0 + xorps @tweak[1],$inout0 +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + xorps @tweak[1],$inout0 + movups $inout0,($out) + +.Lxts_dec_steal: + movzb 16($inp),%eax # borrow $rounds ... + movzb ($out),%ecx # ... and $key + lea 1($inp),$inp + mov %al,($out) + mov %cl,16($out) + lea 1($out),$out + sub \$1,$len + jnz .Lxts_dec_steal + + sub $len_,$out # rewind $out + mov $key_,$key # restore $key + mov $rnds_,$rounds # restore $rounds + + movups ($out),$inout0 + xorps @tweak[0],$inout0 +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + xorps @tweak[0],$inout0 + movups $inout0,($out) + +.Lxts_dec_ret: +___ +$code.=<<___ if ($win64); + movaps 0x60(%rsp),%xmm6 + movaps 0x70(%rsp),%xmm7 + movaps 0x80(%rsp),%xmm8 + movaps 0x90(%rsp),%xmm9 + movaps 0xa0(%rsp),%xmm10 + movaps 0xb0(%rsp),%xmm11 + movaps 0xc0(%rsp),%xmm12 + movaps 0xd0(%rsp),%xmm13 + movaps 0xe0(%rsp),%xmm14 + movaps 0xf0(%rsp),%xmm15 +___ +$code.=<<___; + lea $frame_size(%rsp),%rsp +.Lxts_dec_epilogue: + ret +.size aesni_xts_decrypt,.-aesni_xts_decrypt +___ +} }} + +######################################################################## +# void $PREFIX_cbc_encrypt (const void *inp, void *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivp,const int enc); +{ +my $reserved = $win64?0x40:-0x18; # used in decrypt +$code.=<<___; +.globl ${PREFIX}_cbc_encrypt +.type ${PREFIX}_cbc_encrypt,\@function,6 +.align 16 +${PREFIX}_cbc_encrypt: + test $len,$len # check length + jz .Lcbc_ret + + mov 240($key),$rnds_ # key->rounds + mov $key,$key_ # backup $key + test %r9d,%r9d # 6th argument + jz .Lcbc_decrypt +#--------------------------- CBC ENCRYPT ------------------------------# + movups ($ivp),$inout0 # load iv as initial state + mov $rnds_,$rounds + cmp \$16,$len + jb .Lcbc_enc_tail + sub \$16,$len + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups ($inp),$inout1 # load input + lea 16($inp),$inp + #xorps $inout1,$inout0 +___ + &aesni_generate1("enc",$key,$rounds,$inout0,$inout1); +$code.=<<___; + mov $rnds_,$rounds # restore $rounds + mov $key_,$key # restore $key + movups $inout0,0($out) # store output + lea 16($out),$out + sub \$16,$len + jnc .Lcbc_enc_loop + add \$16,$len + jnz .Lcbc_enc_tail + movups $inout0,($ivp) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + mov $len,%rcx # zaps $key + xchg $inp,$out # $inp is %rsi and $out is %rdi now + .long 0x9066A4F3 # rep movsb + mov \$16,%ecx # zero tail + sub $len,%rcx + xor %eax,%eax + .long 0x9066AAF3 # rep stosb + lea -16(%rdi),%rdi # rewind $out by 1 block + mov $rnds_,$rounds # restore $rounds + mov %rdi,%rsi # $inp and $out are the same + mov $key_,$key # restore $key + xor $len,$len # len=16 + jmp .Lcbc_enc_loop # one more spin + #--------------------------- CBC DECRYPT ------------------------------# +.align 16 +.Lcbc_decrypt: +___ +$code.=<<___ if ($win64); + lea -0x58(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) +.Lcbc_decrypt_body: +___ +$code.=<<___; + movups ($ivp),$iv + mov $rnds_,$rounds + cmp \$0x70,$len + jbe .Lcbc_dec_tail + shr \$1,$rnds_ + sub \$0x70,$len + mov $rnds_,$rounds + movaps $iv,$reserved(%rsp) + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movaps $rndkey0,$reserved(%rsp) # save IV + movups $inout7,($out) + lea 0x10($out),$out +.Lcbc_dec_loop8_enter: + $movkey ($key),$rndkey0 + movups ($inp),$inout0 # load input + movups 0x10($inp),$inout1 + $movkey 16($key),$rndkey1 + + lea 32($key),$key + movdqu 0x20($inp),$inout2 + xorps $rndkey0,$inout0 + movdqu 0x30($inp),$inout3 + xorps $rndkey0,$inout1 + movdqu 0x40($inp),$inout4 + aesdec $rndkey1,$inout0 + pxor $rndkey0,$inout2 + movdqu 0x50($inp),$inout5 + aesdec $rndkey1,$inout1 + pxor $rndkey0,$inout3 + movdqu 0x60($inp),$inout6 + aesdec $rndkey1,$inout2 + pxor $rndkey0,$inout4 + movdqu 0x70($inp),$inout7 + aesdec $rndkey1,$inout3 + pxor $rndkey0,$inout5 + dec $rounds + aesdec $rndkey1,$inout4 + pxor $rndkey0,$inout6 + aesdec $rndkey1,$inout5 + pxor $rndkey0,$inout7 + $movkey ($key),$rndkey0 + aesdec $rndkey1,$inout6 + aesdec $rndkey1,$inout7 + $movkey 16($key),$rndkey1 + + call .Ldec_loop8_enter + + movups ($inp),$rndkey1 # re-load input + movups 0x10($inp),$rndkey0 + xorps $reserved(%rsp),$inout0 # ^= IV + xorps $rndkey1,$inout1 + movups 0x20($inp),$rndkey1 + xorps $rndkey0,$inout2 + movups 0x30($inp),$rndkey0 + xorps $rndkey1,$inout3 + movups 0x40($inp),$rndkey1 + xorps $rndkey0,$inout4 + movups 0x50($inp),$rndkey0 + xorps $rndkey1,$inout5 + movups 0x60($inp),$rndkey1 + xorps $rndkey0,$inout6 + movups 0x70($inp),$rndkey0 # IV + xorps $rndkey1,$inout7 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + mov $rnds_,$rounds # restore $rounds + movups $inout4,0x40($out) + mov $key_,$key # restore $key + movups $inout5,0x50($out) + lea 0x80($inp),$inp + movups $inout6,0x60($out) + lea 0x70($out),$out + sub \$0x80,$len + ja .Lcbc_dec_loop8 + + movaps $inout7,$inout0 + movaps $rndkey0,$iv + add \$0x70,$len + jle .Lcbc_dec_tail_collected + movups $inout0,($out) + lea 1($rnds_,$rnds_),$rounds + lea 0x10($out),$out +.Lcbc_dec_tail: + movups ($inp),$inout0 + movaps $inout0,$in0 + cmp \$0x10,$len + jbe .Lcbc_dec_one + + movups 0x10($inp),$inout1 + movaps $inout1,$in1 + cmp \$0x20,$len + jbe .Lcbc_dec_two + + movups 0x20($inp),$inout2 + movaps $inout2,$in2 + cmp \$0x30,$len + jbe .Lcbc_dec_three + + movups 0x30($inp),$inout3 + cmp \$0x40,$len + jbe .Lcbc_dec_four + + movups 0x40($inp),$inout4 + cmp \$0x50,$len + jbe .Lcbc_dec_five + + movups 0x50($inp),$inout5 + cmp \$0x60,$len + jbe .Lcbc_dec_six + + movups 0x60($inp),$inout6 + movaps $iv,$reserved(%rsp) # save IV + call _aesni_decrypt8 + movups ($inp),$rndkey1 + movups 0x10($inp),$rndkey0 + xorps $reserved(%rsp),$inout0 # ^= IV + xorps $rndkey1,$inout1 + movups 0x20($inp),$rndkey1 + xorps $rndkey0,$inout2 + movups 0x30($inp),$rndkey0 + xorps $rndkey1,$inout3 + movups 0x40($inp),$rndkey1 + xorps $rndkey0,$inout4 + movups 0x50($inp),$rndkey0 + xorps $rndkey1,$inout5 + movups 0x60($inp),$iv # IV + xorps $rndkey0,$inout6 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + movups $inout5,0x50($out) + lea 0x60($out),$out + movaps $inout6,$inout0 + sub \$0x70,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_one: +___ + &aesni_generate1("dec",$key,$rounds); +$code.=<<___; + xorps $iv,$inout0 + movaps $in0,$iv + sub \$0x10,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + xorps $inout2,$inout2 + call _aesni_decrypt3 + xorps $iv,$inout0 + xorps $in0,$inout1 + movups $inout0,($out) + movaps $in1,$iv + movaps $inout1,$inout0 + lea 0x10($out),$out + sub \$0x20,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + call _aesni_decrypt3 + xorps $iv,$inout0 + xorps $in0,$inout1 + movups $inout0,($out) + xorps $in1,$inout2 + movups $inout1,0x10($out) + movaps $in2,$iv + movaps $inout2,$inout0 + lea 0x20($out),$out + sub \$0x30,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + call _aesni_decrypt4 + xorps $iv,$inout0 + movups 0x30($inp),$iv + xorps $in0,$inout1 + movups $inout0,($out) + xorps $in1,$inout2 + movups $inout1,0x10($out) + xorps $in2,$inout3 + movups $inout2,0x20($out) + movaps $inout3,$inout0 + lea 0x30($out),$out + sub \$0x40,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_five: + xorps $inout5,$inout5 + call _aesni_decrypt6 + movups 0x10($inp),$rndkey1 + movups 0x20($inp),$rndkey0 + xorps $iv,$inout0 + xorps $in0,$inout1 + xorps $rndkey1,$inout2 + movups 0x30($inp),$rndkey1 + xorps $rndkey0,$inout3 + movups 0x40($inp),$iv + xorps $rndkey1,$inout4 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + lea 0x40($out),$out + movaps $inout4,$inout0 + sub \$0x50,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 0x10($inp),$rndkey1 + movups 0x20($inp),$rndkey0 + xorps $iv,$inout0 + xorps $in0,$inout1 + xorps $rndkey1,$inout2 + movups 0x30($inp),$rndkey1 + xorps $rndkey0,$inout3 + movups 0x40($inp),$rndkey0 + xorps $rndkey1,$inout4 + movups 0x50($inp),$iv + xorps $rndkey0,$inout5 + movups $inout0,($out) + movups $inout1,0x10($out) + movups $inout2,0x20($out) + movups $inout3,0x30($out) + movups $inout4,0x40($out) + lea 0x50($out),$out + movaps $inout5,$inout0 + sub \$0x60,$len + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_tail_collected: + and \$15,$len + movups $iv,($ivp) + jnz .Lcbc_dec_tail_partial + movups $inout0,($out) + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps $inout0,$reserved(%rsp) + mov \$16,%rcx + mov $out,%rdi + sub $len,%rcx + lea $reserved(%rsp),%rsi + .long 0x9066A4F3 # rep movsb + +.Lcbc_dec_ret: +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + lea 0x58(%rsp),%rsp +___ +$code.=<<___; +.Lcbc_ret: + ret +.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt +___ +} +# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, +# int bits, AES_KEY *key) +{ my ($inp,$bits,$key) = @_4args; + $bits =~ s/%r/%e/; + +$code.=<<___; +.globl ${PREFIX}_set_decrypt_key +.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent +.align 16 +${PREFIX}_set_decrypt_key: + .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 + call __aesni_set_encrypt_key + shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key + test %eax,%eax + jnz .Ldec_key_ret + lea 16($key,$bits),$inp # points at the end of key schedule + + $movkey ($key),%xmm0 # just swap + $movkey ($inp),%xmm1 + $movkey %xmm0,($inp) + $movkey %xmm1,($key) + lea 16($key),$key + lea -16($inp),$inp + +.Ldec_key_inverse: + $movkey ($key),%xmm0 # swap and inverse + $movkey ($inp),%xmm1 + aesimc %xmm0,%xmm0 + aesimc %xmm1,%xmm1 + lea 16($key),$key + lea -16($inp),$inp + $movkey %xmm0,16($inp) + $movkey %xmm1,-16($key) + cmp $key,$inp + ja .Ldec_key_inverse + + $movkey ($key),%xmm0 # inverse middle + aesimc %xmm0,%xmm0 + $movkey %xmm0,($inp) +.Ldec_key_ret: + add \$8,%rsp + ret +.LSEH_end_set_decrypt_key: +.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key +___ + +# This is based on submission by +# +# Huang Ying +# Vinodh Gopal +# Kahraman Akdemir +# +# Agressively optimized in respect to aeskeygenassist's critical path +# and is contained in %xmm0-5 to meet Win64 ABI requirement. +# +$code.=<<___; +.globl ${PREFIX}_set_encrypt_key +.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent +.align 16 +${PREFIX}_set_encrypt_key: +__aesni_set_encrypt_key: + .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 + mov \$-1,%rax + test $inp,$inp + jz .Lenc_key_ret + test $key,$key + jz .Lenc_key_ret + + movups ($inp),%xmm0 # pull first 128 bits of *userKey + xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0 + lea 16($key),%rax + cmp \$256,$bits + je .L14rounds + cmp \$192,$bits + je .L12rounds + cmp \$128,$bits + jne .Lbad_keybits + +.L10rounds: + mov \$9,$bits # 10 rounds for 128-bit key + $movkey %xmm0,($key) # round 0 + aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 + call .Lkey_expansion_128_cold + aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 + call .Lkey_expansion_128 + aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 + call .Lkey_expansion_128 + aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 + call .Lkey_expansion_128 + aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 + call .Lkey_expansion_128 + aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 + call .Lkey_expansion_128 + aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 + call .Lkey_expansion_128 + aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 + call .Lkey_expansion_128 + aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 + call .Lkey_expansion_128 + aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 + call .Lkey_expansion_128 + $movkey %xmm0,(%rax) + mov $bits,80(%rax) # 240(%rdx) + xor %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16($inp),%xmm2 # remaining 1/3 of *userKey + mov \$11,$bits # 12 rounds for 192 + $movkey %xmm0,($key) # round 0 + aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 + call .Lkey_expansion_192a_cold + aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 + call .Lkey_expansion_192b + aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 + call .Lkey_expansion_192a + aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 + call .Lkey_expansion_192b + aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 + call .Lkey_expansion_192a + aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 + call .Lkey_expansion_192b + aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 + call .Lkey_expansion_192a + aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 + call .Lkey_expansion_192b + $movkey %xmm0,(%rax) + mov $bits,48(%rax) # 240(%rdx) + xor %rax, %rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16($inp),%xmm2 # remaning half of *userKey + mov \$13,$bits # 14 rounds for 256 + lea 16(%rax),%rax + $movkey %xmm0,($key) # round 0 + $movkey %xmm2,16($key) # round 1 + aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 + call .Lkey_expansion_256a_cold + aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 + call .Lkey_expansion_256b + aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 + call .Lkey_expansion_256a + aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 + call .Lkey_expansion_256b + aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 + call .Lkey_expansion_256a + aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 + call .Lkey_expansion_256b + aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 + call .Lkey_expansion_256a + aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 + call .Lkey_expansion_256b + aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 + call .Lkey_expansion_256a + aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 + call .Lkey_expansion_256b + aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 + call .Lkey_expansion_256a + aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 + call .Lkey_expansion_256b + aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 + call .Lkey_expansion_256a + $movkey %xmm0,(%rax) + mov $bits,16(%rax) # 240(%rdx) + xor %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + mov \$-2,%rax +.Lenc_key_ret: + add \$8,%rsp + ret +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + $movkey %xmm0,(%rax) + lea 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps \$0b00010000,%xmm0,%xmm4 + xorps %xmm4, %xmm0 + shufps \$0b10001100,%xmm0,%xmm4 + xorps %xmm4, %xmm0 + shufps \$0b11111111,%xmm1,%xmm1 # critical path + xorps %xmm1,%xmm0 + ret + +.align 16 +.Lkey_expansion_192a: + $movkey %xmm0,(%rax) + lea 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2, %xmm5 +.Lkey_expansion_192b_warm: + shufps \$0b00010000,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps \$0b10001100,%xmm0,%xmm4 + pslldq \$4,%xmm3 + xorps %xmm4,%xmm0 + pshufd \$0b01010101,%xmm1,%xmm1 # critical path + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd \$0b11111111,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps \$0b01000100,%xmm0,%xmm5 + $movkey %xmm5,(%rax) + shufps \$0b01001110,%xmm2,%xmm3 + $movkey %xmm3,16(%rax) + lea 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + $movkey %xmm2,(%rax) + lea 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps \$0b00010000,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps \$0b10001100,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps \$0b11111111,%xmm1,%xmm1 # critical path + xorps %xmm1,%xmm0 + ret + +.align 16 +.Lkey_expansion_256b: + $movkey %xmm0,(%rax) + lea 16(%rax),%rax + + shufps \$0b00010000,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps \$0b10001100,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps \$0b10101010,%xmm1,%xmm1 # critical path + xorps %xmm1,%xmm2 + ret +.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +___ +} + +$code.=<<___; +.align 64 +.Lbswap_mask: + .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: + .long 6,6,6,0 +.Lincrement64: + .long 1,0,0,0 +.Lxts_magic: + .long 0x87,0,1,0 + +.asciz "AES for Intel AES-NI, CRYPTOGAMS by " +.align 64 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +___ +$code.=<<___ if ($PREFIX eq "aesni"); +.type ecb_se_handler,\@abi-omnipotent +.align 16 +ecb_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 152($context),%rax # pull context->Rsp + + jmp .Lcommon_seh_tail +.size ecb_se_handler,.-ecb_se_handler + +.type ccm64_se_handler,\@abi-omnipotent +.align 16 +ccm64_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + lea 0(%rax),%rsi # %xmm save area + lea 512($context),%rdi # &context.Xmm6 + mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0x58(%rax),%rax # adjust stack pointer + + jmp .Lcommon_seh_tail +.size ccm64_se_handler,.-ccm64_se_handler + +.type ctr32_se_handler,\@abi-omnipotent +.align 16 +ctr32_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lctr32_body(%rip),%r10 + cmp %r10,%rbx # context->Rip<"prologue" label + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + lea .Lctr32_ret(%rip),%r10 + cmp %r10,%rbx + jae .Lcommon_seh_tail + + lea 0x20(%rax),%rsi # %xmm save area + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0xc8(%rax),%rax # adjust stack pointer + + jmp .Lcommon_seh_tail +.size ctr32_se_handler,.-ctr32_se_handler + +.type xts_se_handler,\@abi-omnipotent +.align 16 +xts_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue lable + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + lea 0x60(%rax),%rsi # %xmm save area + lea 512($context),%rdi # & context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0x68+160(%rax),%rax # adjust stack pointer + + jmp .Lcommon_seh_tail +.size xts_se_handler,.-xts_se_handler +___ +$code.=<<___; +.type cbc_se_handler,\@abi-omnipotent +.align 16 +cbc_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 152($context),%rax # pull context->Rsp + mov 248($context),%rbx # pull context->Rip + + lea .Lcbc_decrypt(%rip),%r10 + cmp %r10,%rbx # context->Rip<"prologue" label + jb .Lcommon_seh_tail + + lea .Lcbc_decrypt_body(%rip),%r10 + cmp %r10,%rbx # context->RipRip>="epilogue" label + jae .Lcommon_seh_tail + + lea 0(%rax),%rsi # top of stack + lea 512($context),%rdi # &context.Xmm6 + mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0x58(%rax),%rax # adjust stack pointer + jmp .Lcommon_seh_tail + +.Lrestore_cbc_rax: + mov 120($context),%rax + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size cbc_se_handler,.-cbc_se_handler + +.section .pdata +.align 4 +___ +$code.=<<___ if ($PREFIX eq "aesni"); + .rva .LSEH_begin_aesni_ecb_encrypt + .rva .LSEH_end_aesni_ecb_encrypt + .rva .LSEH_info_ecb + + .rva .LSEH_begin_aesni_ccm64_encrypt_blocks + .rva .LSEH_end_aesni_ccm64_encrypt_blocks + .rva .LSEH_info_ccm64_enc + + .rva .LSEH_begin_aesni_ccm64_decrypt_blocks + .rva .LSEH_end_aesni_ccm64_decrypt_blocks + .rva .LSEH_info_ccm64_dec + + .rva .LSEH_begin_aesni_ctr32_encrypt_blocks + .rva .LSEH_end_aesni_ctr32_encrypt_blocks + .rva .LSEH_info_ctr32 + + .rva .LSEH_begin_aesni_xts_encrypt + .rva .LSEH_end_aesni_xts_encrypt + .rva .LSEH_info_xts_enc + + .rva .LSEH_begin_aesni_xts_decrypt + .rva .LSEH_end_aesni_xts_decrypt + .rva .LSEH_info_xts_dec +___ +$code.=<<___; + .rva .LSEH_begin_${PREFIX}_cbc_encrypt + .rva .LSEH_end_${PREFIX}_cbc_encrypt + .rva .LSEH_info_cbc + + .rva ${PREFIX}_set_decrypt_key + .rva .LSEH_end_set_decrypt_key + .rva .LSEH_info_key + + .rva ${PREFIX}_set_encrypt_key + .rva .LSEH_end_set_encrypt_key + .rva .LSEH_info_key +.section .xdata +.align 8 +___ +$code.=<<___ if ($PREFIX eq "aesni"); +.LSEH_info_ecb: + .byte 9,0,0,0 + .rva ecb_se_handler +.LSEH_info_ccm64_enc: + .byte 9,0,0,0 + .rva ccm64_se_handler + .rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[] +.LSEH_info_ccm64_dec: + .byte 9,0,0,0 + .rva ccm64_se_handler + .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[] +.LSEH_info_ctr32: + .byte 9,0,0,0 + .rva ctr32_se_handler +.LSEH_info_xts_enc: + .byte 9,0,0,0 + .rva xts_se_handler + .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] +.LSEH_info_xts_dec: + .byte 9,0,0,0 + .rva xts_se_handler + .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] +___ +$code.=<<___; +.LSEH_info_cbc: + .byte 9,0,0,0 + .rva cbc_se_handler +.LSEH_info_key: + .byte 0x01,0x04,0x01,0x00 + .byte 0x04,0x02,0x00,0x00 # sub rsp,8 +___ +} + +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + push @opcode,$rex|0x40 if($rex); +} + +sub aesni { + my $line=shift; + my @opcode=(0x66); + + if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + rex(\@opcode,$4,$3); + push @opcode,0x0f,0x3a,0xdf; + push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M + my $c=$2; + push @opcode,$c=~/^0/?oct($c):$c; + return ".byte\t".join(',',@opcode); + } + elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my %opcodelet = ( + "aesimc" => 0xdb, + "aesenc" => 0xdc, "aesenclast" => 0xdd, + "aesdec" => 0xde, "aesdeclast" => 0xdf + ); + return undef if (!defined($opcodelet{$1})); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x38,$opcodelet{$1}; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } + return $line; +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/bsaes-armv7.S b/app/openssl/crypto/aes/asm/bsaes-armv7.S new file mode 100644 index 00000000..64205d45 --- /dev/null +++ b/app/openssl/crypto/aes/asm/bsaes-armv7.S @@ -0,0 +1,2544 @@ + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ . Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/app/openssl/crypto/aes/asm/bsaes-armv7.pl b/app/openssl/crypto/aes/asm/bsaes-armv7.pl new file mode 100644 index 00000000..f3d96d93 --- /dev/null +++ b/app/openssl/crypto/aes/asm/bsaes-armv7.pl @@ -0,0 +1,2467 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Specific modes and adaptation for Linux kernel by Ard Biesheuvel +# . Permission to use under GPL terms is +# granted. +# ==================================================================== + +# Bit-sliced AES for ARM NEON +# +# February 2012. +# +# This implementation is direct adaptation of bsaes-x86_64 module for +# ARM NEON. Except that this module is endian-neutral [in sense that +# it can be compiled for either endianness] by courtesy of vld1.8's +# neutrality. Initial version doesn't implement interface to OpenSSL, +# only low-level primitives and unsupported entry points, just enough +# to collect performance results, which for Cortex-A8 core are: +# +# encrypt 19.5 cycles per byte processed with 128-bit key +# decrypt 22.1 cycles per byte processed with 128-bit key +# key conv. 440 cycles per 128-bit key/0.18 of 8x block +# +# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +# which is [much] worse than anticipated (for further details see +# http://www.openssl.org/~appro/Snapdragon-S4.html). +# +# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +# manages in 20.0 cycles]. +# +# When comparing to x86_64 results keep in mind that NEON unit is +# [mostly] single-issue and thus can't [fully] benefit from +# instruction-level parallelism. And when comparing to aes-armv4 +# results keep in mind key schedule conversion overhead (see +# bsaes-x86_64.pl for further details)... +# +# + +# April-August 2013 +# +# Add CBC, CTR and XTS subroutines, adapt for kernel use. +# +# + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); +my @XMM=map("q$_",(0..15)); + +{ +my ($key,$rounds,$const)=("r4","r5","r6"); + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[2], @b[2], @b[1] + veor @b[5], @b[5], @b[6] + veor @b[3], @b[3], @b[0] + veor @b[6], @b[6], @b[2] + veor @b[5], @b[5], @b[0] + + veor @b[6], @b[6], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[4], @b[4], @b[5] + + veor @b[2], @b[2], @b[7] + veor @b[3], @b[3], @b[1] + veor @b[1], @b[1], @b[5] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] + veor @b[4], @b[4], @b[6] + veor @b[2], @b[2], @b[0] + veor @b[6], @b[6], @b[1] + + veor @b[1], @b[1], @b[5] + veor @b[5], @b[5], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[2], @b[2], @b[5] + + veor @b[4], @b[4], @b[7] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse (with twist) +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + veor @b[1], @b[1], @b[7] + veor @b[4], @b[4], @b[7] + + veor @b[7], @b[7], @b[5] + veor @b[1], @b[1], @b[3] + veor @b[2], @b[2], @b[5] + veor @b[3], @b[3], @b[7] + + veor @b[6], @b[6], @b[1] + veor @b[2], @b[2], @b[0] + veor @b[5], @b[5], @b[3] + veor @b[4], @b[4], @b[6] + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + veor @b[1], @b[1], @b[5] + veor @b[2], @b[2], @b[7] + + veor @b[3], @b[3], @b[1] + veor @b[4], @b[4], @b[5] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[5], @b[5], @b[0] + veor @b[3], @b[3], @b[7] + veor @b[6], @b[6], @b[2] + veor @b[2], @b[2], @b[1] + veor @b[6], @b[6], @b[3] + + veor @b[3], @b[3], @b[0] + veor @b[5], @b[5], @b[6] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $t1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $t1, $t0 + veor $x0, $x0, $t1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $x1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $x1, $x0 + veor $x0, $x0, $t0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + veor $t1, $y2, $y3 + vand $t0, $t0, $x0 + vand $t1, $t1, $x2 + veor $x0, $x0, $x1 + veor $x2, $x2, $x3 + vand $x1, $x1, $y0 + vand $x3, $x3, $y2 + vand $x0, $x0, $y1 + vand $x2, $x2, $y3 + veor $x1, $x1, $x0 + veor $x2, $x2, $x3 + veor $x0, $x0, $t0 + veor $x3, $x3, $t1 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + veor @t[0], @x[0], @x[2] + veor @t[1], @x[1], @x[3] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @x[0], @x[0], @t[0] + veor @x[2], @x[2], @t[0] + veor @x[1], @x[1], @t[1] + veor @x[3], @x[3], @t[1] + + veor @t[0], @x[4], @x[6] + veor @t[1], @x[5], @x[7] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @x[4], @x[4], @t[0] + veor @x[6], @x[6], @t[0] + veor @x[5], @x[5], @t[1] + veor @x[7], @x[7], @t[1] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + veor @t[3], @x[4], @x[6] + veor @t[2], @x[5], @x[7] + veor @t[1], @x[1], @x[3] + veor @s[1], @x[7], @x[6] + vmov @t[0], @t[2] + veor @s[0], @x[0], @x[2] + + vorr @t[2], @t[2], @t[1] + veor @s[3], @t[3], @t[0] + vand @s[2], @t[3], @s[0] + vorr @t[3], @t[3], @s[0] + veor @s[0], @s[0], @t[1] + vand @t[0], @t[0], @t[1] + veor @t[1], @x[3], @x[2] + vand @s[3], @s[3], @s[0] + vand @s[1], @s[1], @t[1] + veor @t[1], @x[4], @x[5] + veor @s[0], @x[1], @x[0] + veor @t[3], @t[3], @s[1] + veor @t[2], @t[2], @s[1] + vand @s[1], @t[1], @s[0] + vorr @t[1], @t[1], @s[0] + veor @t[3], @t[3], @s[3] + veor @t[0], @t[0], @s[1] + veor @t[2], @t[2], @s[2] + veor @t[1], @t[1], @s[3] + veor @t[0], @t[0], @s[2] + vand @s[0], @x[7], @x[3] + veor @t[1], @t[1], @s[2] + vand @s[1], @x[6], @x[2] + vand @s[2], @x[5], @x[1] + vorr @s[3], @x[4], @x[0] + veor @t[3], @t[3], @s[0] + veor @t[1], @t[1], @s[2] + veor @t[0], @t[0], @s[3] + veor @t[2], @t[2], @s[1] + + @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + @ new smaller inversion + + vand @s[2], @t[3], @t[1] + vmov @s[0], @t[0] + + veor @s[1], @t[2], @s[2] + veor @s[3], @t[0], @s[2] + veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] + + vbsl @s[1], @t[1], @t[0] + vbsl @s[3], @t[3], @t[2] + veor @t[3], @t[3], @t[2] + + vbsl @s[0], @s[1], @s[2] + vbsl @t[0], @s[2], @s[1] + + vand @s[2], @s[0], @s[3] + veor @t[1], @t[1], @t[0] + + veor @s[2], @s[2], @t[3] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my @t=@_[8..11]; +my $mask=pop; +$code.=<<___; + vldmia $key!, {@t[0]-@t[3]} + veor @t[0], @t[0], @x[0] + veor @t[1], @t[1], @x[1] + vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` + vldmia $key!, {@t[0]} + veor @t[2], @t[2], @x[2] + vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` + vldmia $key!, {@t[1]} + veor @t[3], @t[3], @x[3] + vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` + vldmia $key!, {@t[2]} + vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` + vldmia $key!, {@t[3]} + veor @t[0], @t[0], @x[4] + veor @t[1], @t[1], @x[5] + vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` + veor @t[2], @t[2], @x[6] + vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` + veor @t[3], @t[3], @x[7] + vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` + vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) + vext.8 @t[2], @x[2], @x[2], #12 + veor @x[1], @x[1], @t[1] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[2], @x[2], @t[2] + vext.8 @t[4], @x[4], @x[4], #12 + veor @x[3], @x[3], @t[3] + vext.8 @t[5], @x[5], @x[5], #12 + veor @x[4], @x[4], @t[4] + vext.8 @t[6], @x[6], @x[6], #12 + veor @x[5], @x[5], @t[5] + vext.8 @t[7], @x[7], @x[7], #12 + veor @x[6], @x[6], @t[6] + + veor @t[1], @t[1], @x[0] + veor @x[7], @x[7], @t[7] + vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor @t[2], @t[2], @x[1] + veor @t[0], @t[0], @x[7] + veor @t[1], @t[1], @x[7] + vext.8 @x[1], @x[1], @x[1], #8 + veor @t[5], @t[5], @x[4] + veor @x[0], @x[0], @t[0] + veor @t[6], @t[6], @x[5] + veor @x[1], @x[1], @t[1] + vext.8 @t[0], @x[4], @x[4], #8 + veor @t[4], @t[4], @x[3] + vext.8 @t[1], @x[5], @x[5], #8 + veor @t[7], @t[7], @x[6] + vext.8 @x[4], @x[3], @x[3], #8 + veor @t[3], @t[3], @x[2] + vext.8 @x[5], @x[7], @x[7], #8 + veor @t[4], @t[4], @x[7] + vext.8 @x[3], @x[6], @x[6], #8 + veor @t[3], @t[3], @x[7] + vext.8 @x[6], @x[2], @x[2], #8 + veor @x[7], @t[1], @t[5] +___ +$code.=<<___ if (!$inv); + veor @x[2], @t[0], @t[4] + veor @x[4], @x[4], @t[3] + veor @x[5], @x[5], @t[7] + veor @x[3], @x[3], @t[6] + @ vmov @x[2], @t[0] + veor @x[6], @x[6], @t[2] + @ vmov @x[7], @t[1] +___ +$code.=<<___ if ($inv); + veor @t[3], @t[3], @x[4] + veor @x[5], @x[5], @t[7] + veor @x[2], @x[3], @t[6] + veor @x[3], @t[0], @t[4] + veor @x[4], @x[6], @t[2] + vmov @x[6], @t[3] + @ vmov @x[7], @t[1] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + @ multiplication by 0x0e + vext.8 @t[7], @x[7], @x[7], #12 + vmov @t[2], @x[2] + veor @x[2], @x[2], @x[5] @ 2 5 + veor @x[7], @x[7], @x[5] @ 7 5 + vext.8 @t[0], @x[0], @x[0], #12 + vmov @t[5], @x[5] + veor @x[5], @x[5], @x[0] @ 5 0 [1] + veor @x[0], @x[0], @x[1] @ 0 1 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[1], @x[1], @x[2] @ 1 25 + veor @x[0], @x[0], @x[6] @ 01 6 [2] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[1], @x[1], @x[3] @ 125 3 [4] + veor @x[2], @x[2], @x[0] @ 25 016 [3] + veor @x[3], @x[3], @x[7] @ 3 75 + veor @x[7], @x[7], @x[6] @ 75 6 [0] + vext.8 @t[6], @x[6], @x[6], #12 + vmov @t[4], @x[4] + veor @x[6], @x[6], @x[4] @ 6 4 + veor @x[4], @x[4], @x[3] @ 4 375 [6] + veor @x[3], @x[3], @x[7] @ 375 756=36 + veor @x[6], @x[6], @t[5] @ 64 5 [7] + veor @x[3], @x[3], @t[2] @ 36 2 + vext.8 @t[5], @t[5], @t[5], #12 + veor @x[3], @x[3], @t[4] @ 362 4 [5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + @ multiplication by 0x0b + veor @y[1], @y[1], @y[0] + veor @y[0], @y[0], @t[0] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[5] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[1], @y[1], @t[6] + veor @y[0], @y[0], @t[7] + veor @t[7], @t[7], @t[6] @ clobber t[7] + + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @y[0] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[2], @y[2], @t[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + veor @y[2], @y[2], @t[2] + veor @y[3], @y[3], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[7] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[3], @y[3], @t[3] + veor @y[6], @y[6], @t[3] + veor @y[4], @y[4], @t[3] + veor @y[7], @y[7], @t[4] + vext.8 @t[3], @t[3], @t[3], #12 + veor @y[5], @y[5], @t[4] + veor @y[7], @y[7], @t[7] + veor @t[7], @t[7], @t[5] @ clobber t[7] even more + veor @y[3], @y[3], @t[5] + veor @y[4], @y[4], @t[4] + + veor @y[5], @y[5], @t[7] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[6], @y[6], @t[7] + veor @y[4], @y[4], @t[7] + + veor @t[7], @t[7], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + + @ multiplication by 0x0d + veor @y[4], @y[4], @y[7] + veor @t[7], @t[7], @t[6] @ restore t[7] + veor @y[7], @y[7], @t[4] + vext.8 @t[6], @t[6], @t[6], #12 + veor @y[2], @y[2], @t[0] + veor @y[7], @y[7], @t[5] + vext.8 @t[7], @t[7], @t[7], #12 + veor @y[2], @y[2], @t[2] + + veor @y[3], @y[3], @y[1] + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[0] + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @t[5] + veor @y[0], @y[0], @t[5] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[1], @y[1], @t[7] + veor @y[0], @y[0], @t[6] + veor @y[3], @y[3], @y[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + + veor @y[7], @y[7], @t[7] + veor @y[4], @y[4], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[6] + veor @t[6], @t[6], @t[3] @ clobber t[6] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[4], @y[4], @y[7] + veor @y[3], @y[3], @t[6] + + veor @y[6], @y[6], @t[6] + veor @y[5], @y[5], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + veor @y[6], @y[6], @t[4] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[5], @y[5], @t[6] + veor @y[6], @y[6], @t[7] + vext.8 @t[7], @t[7], @t[7], #12 + veor @t[6], @t[6], @t[3] @ restore t[6] + vext.8 @t[3], @t[3], @t[3], #12 + + @ multiplication by 0x09 + veor @y[4], @y[4], @y[1] + veor @t[1], @t[1], @y[1] @ t[1]=y[1] + veor @t[0], @t[0], @t[5] @ clobber t[0] + vext.8 @t[6], @t[6], @t[6], #12 + veor @t[1], @t[1], @t[5] + veor @y[3], @y[3], @t[0] + veor @t[0], @t[0], @y[0] @ t[0]=y[0] + veor @t[1], @t[1], @t[6] + veor @t[6], @t[6], @t[7] @ clobber t[6] + veor @y[4], @y[4], @t[1] + veor @y[7], @y[7], @t[4] + veor @y[6], @y[6], @t[3] + veor @y[5], @y[5], @t[2] + veor @t[4], @t[4], @y[4] @ t[4]=y[4] + veor @t[3], @t[3], @y[3] @ t[3]=y[3] + veor @t[5], @t[5], @y[5] @ t[5]=y[5] + veor @t[2], @t[2], @y[2] @ t[2]=y[2] + veor @t[3], @t[3], @t[7] + veor @XMM[5], @t[5], @t[6] + veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] + veor @XMM[2], @t[2], @t[6] + veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] + + vmov @XMM[0], @t[0] + vmov @XMM[1], @t[1] + @ vmov @XMM[2], @t[2] + vmov @XMM[3], @t[3] + vmov @XMM[4], @t[4] + @ vmov @XMM[5], @t[5] + @ vmov @XMM[6], @t[6] + @ vmov @XMM[7], @t[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 @t[0], @x[0], @x[0], #8 + vext.8 @t[6], @x[6], @x[6], #8 + vext.8 @t[7], @x[7], @x[7], #8 + veor @t[0], @t[0], @x[0] + vext.8 @t[1], @x[1], @x[1], #8 + veor @t[6], @t[6], @x[6] + vext.8 @t[2], @x[2], @x[2], #8 + veor @t[7], @t[7], @x[7] + vext.8 @t[3], @x[3], @x[3], #8 + veor @t[1], @t[1], @x[1] + vext.8 @t[4], @x[4], @x[4], #8 + veor @t[2], @t[2], @x[2] + vext.8 @t[5], @x[5], @x[5], #8 + veor @t[3], @t[3], @x[3] + veor @t[4], @t[4], @x[4] + veor @t[5], @t[5], @x[5] + + veor @x[0], @x[0], @t[6] + veor @x[1], @x[1], @t[6] + veor @x[2], @x[2], @t[0] + veor @x[4], @x[4], @t[2] + veor @x[3], @x[3], @t[1] + veor @x[1], @x[1], @t[7] + veor @x[2], @x[2], @t[7] + veor @x[4], @x[4], @t[6] + veor @x[5], @x[5], @t[3] + veor @x[3], @x[3], @t[6] + veor @x[6], @x[6], @t[4] + veor @x[4], @x[4], @t[7] + veor @x[5], @x[5], @t[7] + veor @x[7], @x[7], @t[5] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + vshr.u64 $t, $b, #$n + veor $t, $t, $a + vand $t, $t, $mask + veor $a, $a, $t + vshl.u64 $t, $t, #$n + veor $b, $b, $t +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + vshr.u64 $t0, $b0, #$n + vshr.u64 $t1, $b1, #$n + veor $t0, $t0, $a0 + veor $t1, $t1, $a1 + vand $t0, $t0, $mask + vand $t1, $t1, $mask + veor $a0, $a0, $t0 + vshl.u64 $t0, $t0, #$n + veor $a1, $a1, $t1 + vshl.u64 $t1, $t1, #$n + veor $b0, $b0, $t0 + veor $b1, $b1, $t1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + vmov.i8 $t0,#0x55 @ compose .LBS0 + vmov.i8 $t1,#0x33 @ compose .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + vmov.i8 $t0,#0x0f @ compose .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr $const,_bsaes_decrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + add $const,$const,#.LM0ISR-_bsaes_decrypt8 + + vldmia $const!, {@XMM[8]} @ .LM0ISR + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq $const,$const,#0x10 + bne .Ldec_loop + vldmia $const, {@XMM[12]} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr $const,_bsaes_encrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + sub $const,$const,#_bsaes_encrypt8-.LM0SR + + vldmia $const!, {@XMM[8]} @ .LM0SR +_bsaes_encrypt8_alt: + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq $const,$const,#0x10 + bne .Lenc_loop + vldmia $const, {@XMM[12]} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + @ &swapmove(@x[2,3],1,$t0,$t2,$t3); + vmov @x[2], @x[0] + vmov @x[3], @x[1] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + vmov @x[4], @x[0] + vmov @x[6], @x[2] + vmov @x[5], @x[1] + vmov @x[7], @x[3] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr $const,_bsaes_key_convert + vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key + sub $const,$const,#_bsaes_key_convert-.LM0 + vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key + + vmov.i8 @XMM[8], #0x01 @ bit masks + vmov.i8 @XMM[9], #0x02 + vmov.i8 @XMM[10], #0x04 + vmov.i8 @XMM[11], #0x08 + vmov.i8 @XMM[12], #0x10 + vmov.i8 @XMM[13], #0x20 + vldmia $const, {@XMM[14]} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 @XMM[7], @XMM[7] + vrev32.8 @XMM[15], @XMM[15] +#endif + sub $rounds,$rounds,#1 + vstmia $out!, {@XMM[7]} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` + vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` + vmov.i8 @XMM[6], #0x40 + vmov.i8 @XMM[15], #0x80 + + vtst.8 @XMM[0], @XMM[7], @XMM[8] + vtst.8 @XMM[1], @XMM[7], @XMM[9] + vtst.8 @XMM[2], @XMM[7], @XMM[10] + vtst.8 @XMM[3], @XMM[7], @XMM[11] + vtst.8 @XMM[4], @XMM[7], @XMM[12] + vtst.8 @XMM[5], @XMM[7], @XMM[13] + vtst.8 @XMM[6], @XMM[7], @XMM[6] + vtst.8 @XMM[7], @XMM[7], @XMM[15] + vld1.8 {@XMM[15]}, [$inp]! @ load next round key + vmvn @XMM[0], @XMM[0] @ "pnot" + vmvn @XMM[1], @XMM[1] + vmvn @XMM[5], @XMM[5] + vmvn @XMM[6], @XMM[6] +#ifdef __ARMEL__ + vrev32.8 @XMM[15], @XMM[15] +#endif + subs $rounds,$rounds,#1 + vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 @XMM[7],#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,%function +.align 4 +bsaes_enc_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,%function +.align 4 +bsaes_encrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Lenc128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_encrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Lenc128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,%function +.align 4 +bsaes_dec_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + vldmia $out, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $out, {@XMM[7]} + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,%function +.align 4 +bsaes_decrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Ldec128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_decrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Ldec128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); +my ($keysched)=("sp"); + +$code.=<<___; +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp $len, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ivp, [ip] @ IV is 1st arg on the stack + mov $len, $len, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + vldmia $keysched, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $keysched, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: +#endif + + vld1.8 {@XMM[15]}, [$ivp] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs $len, $len, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5, $rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp] + sub $inp, $inp, #0x60 + vstmia $fp, {@XMM[15]} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + veor @XMM[5], @XMM[5], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds $len, $len, #8 + beq .Lcbc_dec_done + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + mov r5, $rounds + vstmia $fp, {@XMM[15]} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {@XMM[2]}, [$inp]! + cmp $len, #4 + blo .Lcbc_dec_three + vld1.8 {@XMM[3]}, [$inp]! + beq .Lcbc_dec_four + vld1.8 {@XMM[4]}, [$inp]! + cmp $len, #6 + blo .Lcbc_dec_five + vld1.8 {@XMM[5]}, [$inp]! + beq .Lcbc_dec_six + vld1.8 {@XMM[6]}, [$inp]! + sub $inp, $inp, #0x70 + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub $inp, $inp, #0x60 + bl _bsaes_decrypt8 + vldmia $fp,{@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub $inp, $inp, #0x50 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[2], @XMM[2], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub $inp, $inp, #0x40 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub $inp, $inp, #0x30 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub $inp, $inp, #0x20 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[1], @XMM[1], @XMM[8] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub $inp, $inp, #0x10 + mov $rounds, $out @ save original out pointer + mov $out, $fp @ use the iv scratch space as out buffer + mov r2, $key + vmov @XMM[4],@XMM[15] @ just in case ensure that IV + vmov @XMM[5],@XMM[0] @ and input are preserved + bl AES_decrypt + vld1.8 {@XMM[0]}, [$fp,:64] @ load result + veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV + vmov @XMM[15], @XMM[5] @ @XMM[5] holds input + vst1.8 {@XMM[0]}, [$rounds] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lcbc_dec_bzero +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + vst1.8 {@XMM[15]}, [$ivp] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); +my $const = "r6"; # shared with _bsaes_encrypt8_alt +my $keysched = "sp"; + +$code.=<<___; +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp $len, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ctr, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vld1.8 {@XMM[0]}, [$ctr] @ load counter + add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr + vldmia $keysched, {@XMM[4]} @ load round0 key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +.align 2 +0: add r12, $key, #248 + vld1.8 {@XMM[0]}, [$ctr] @ load counter + adrl $ctr, .LREVM0SR @ borrow $ctr + vldmia r12, {@XMM[4]} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 @XMM[8],#1 @ compose 1<<96 + veor @XMM[9],@XMM[9],@XMM[9] + vrev32.8 @XMM[0],@XMM[0] + vext.8 @XMM[8],@XMM[9],@XMM[8],#4 + vrev32.8 @XMM[4],@XMM[4] + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vstmia $keysched, {@XMM[4]} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 + vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 + vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 + vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 + vadd.u32 @XMM[4], @XMM[1], @XMM[10] + vadd.u32 @XMM[5], @XMM[2], @XMM[10] + vadd.u32 @XMM[6], @XMM[3], @XMM[10] + vadd.u32 @XMM[7], @XMM[4], @XMM[10] + vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia $keysched, {@XMM[9]} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, $keysched, #0x10 @ pass next round key +#else + add r4, $key, #`248+16` +#endif + vldmia $ctr, {@XMM[8]} @ .LREVM0SR + mov r5, $rounds @ pass rounds + vstmia $fp, {@XMM[10]} @ save next counter + sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs $len, $len, #8 + blo .Lctr_enc_loop_done + + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[0], @XMM[8] + veor @XMM[1], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[10] + veor @XMM[6], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[7], @XMM[13] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + veor @XMM[5], @XMM[15] + vst1.8 {@XMM[6]}, [$out]! + vmov.i32 @XMM[8], #1 @ compose 1<<96 + vst1.8 {@XMM[3]}, [$out]! + veor @XMM[9], @XMM[9], @XMM[9] + vst1.8 {@XMM[7]}, [$out]! + vext.8 @XMM[8], @XMM[9], @XMM[8], #4 + vst1.8 {@XMM[2]}, [$out]! + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vst1.8 {@XMM[5]}, [$out]! + vldmia $fp, {@XMM[0]} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add $len, $len, #8 + vld1.8 {@XMM[8]}, [$inp]! @ load input + veor @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! @ write output + cmp $len, #2 + blo .Lctr_enc_done + vld1.8 {@XMM[9]}, [$inp]! + veor @XMM[1], @XMM[9] + vst1.8 {@XMM[1]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[4], @XMM[10] + vst1.8 {@XMM[4]}, [$out]! + cmp $len, #4 + blo .Lctr_enc_done + vld1.8 {@XMM[11]}, [$inp]! + veor @XMM[6], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[3]}, [$out]! + cmp $len, #6 + blo .Lctr_enc_done + vld1.8 {@XMM[13]}, [$inp]! + veor @XMM[7], @XMM[13] + vst1.8 {@XMM[7]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[14]}, [$inp] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[2]}, [$out]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lctr_enc_bzero +#else + vstmia $keysched, {q0-q1} +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, $inp @ copy arguments + mov r5, $out + mov r6, $len + mov r7, $key + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {@XMM[1]}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [r4]! @ load input + vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor @XMM[0],@XMM[0],@XMM[1] + vst1.8 {@XMM[0]}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +} +{ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); +my $const="r6"; # returned by _bsaes_key_convert +my $twmask=@XMM[5]; +my @T=@XMM[6..7]; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + subs $len, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds $len, #0x70 + bmi .Lxts_enc_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_enc_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_enc_ret + sub r6, $out, #0x10 + +.Lxts_enc_steal: + ldrb r0, [$inp], #1 + ldrb r1, [$out, #-0x10] + strb r0, [$out, #-0x10] + strb r1, [$out], #1 + + subs $len, #1 + bhi .Lxts_enc_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_enc_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + tst $len, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne $len, #0x10 @ subtract another 16 bytes + subs $len, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds $len, #0x70 + bmi .Lxts_dec_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_dec_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + mov r5, $magic @ preserve magic + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + mov $magic, r5 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia $magic, {$twmask} + vshr.s64 @XMM[6], @XMM[8], #63 + vand @XMM[6], @XMM[6], $twmask + vadd.u64 @XMM[9], @XMM[8], @XMM[8] + vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` + veor @XMM[9], @XMM[9], @XMM[6] + + @ perform the final decryption with the last tweak value + vld1.8 {@XMM[0]}, [$inp]! + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[9] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[9] + vst1.8 {@XMM[0]}, [$out] + + mov r6, $out +.Lxts_dec_steal: + ldrb r1, [$out] + ldrb r0, [$inp], #1 + strb r1, [$out, #0x10] + strb r0, [$out], #1 + + subs $len, #1 + bhi .Lxts_dec_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_dec_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/bsaes-x86_64.S b/app/openssl/crypto/aes/asm/bsaes-x86_64.S new file mode 100644 index 00000000..dc92d4da --- /dev/null +++ b/app/openssl/crypto/aes/asm/bsaes-x86_64.S @@ -0,0 +1,2498 @@ +.text + + + + +.type _bsaes_encrypt8,@function +.align 64 +_bsaes_encrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.align 16 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.align 16 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,@function +.align 64 +_bsaes_decrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.align 16 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.align 16 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +.type _bsaes_key_convert,@function +.align 16 +_bsaes_key_convert: + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.align 16 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 +.size _bsaes_key_convert,.-_bsaes_key_convert + +.globl bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,@function +.align 16 +bsaes_cbc_encrypt: + cmpl $0,%r9d + jne asm_AES_cbc_encrypt + cmpq $128,%rdx + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lcbc_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,@function +.align 16 +bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 + leaq .LBS0(%rip),%r11 +.byte 102,15,56,0,247 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lctr_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,@function +.align 16 +bsaes_xts_encrypt: + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $128,%r14 + jz .Lxts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,@function +.align 16 +bsaes_xts_decrypt: + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $128,%r14 + jz .Lxts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +.type _bsaes_const,@object +.align 64 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.align 64 +.size _bsaes_const,.-_bsaes_const diff --git a/app/openssl/crypto/aes/asm/bsaes-x86_64.pl b/app/openssl/crypto/aes/asm/bsaes-x86_64.pl new file mode 100644 index 00000000..41b90f08 --- /dev/null +++ b/app/openssl/crypto/aes/asm/bsaes-x86_64.pl @@ -0,0 +1,3108 @@ +#!/usr/bin/env perl + +################################################################### +### AES-128 [originally in CTR mode] ### +### bitsliced implementation for Intel Core 2 processors ### +### requires support of SSE extensions up to SSSE3 ### +### Author: Emilia Käsper and Peter Schwabe ### +### Date: 2009-03-19 ### +### Public domain ### +### ### +### See http://homes.esat.kuleuven.be/~ekasper/#software for ### +### further information. ### +################################################################### +# +# September 2011. +# +# Started as transliteration to "perlasm" the original code has +# undergone following changes: +# +# - code was made position-independent; +# - rounds were folded into a loop resulting in >5x size reduction +# from 12.5KB to 2.2KB; +# - above was possibile thanks to mixcolumns() modification that +# allowed to feed its output back to aesenc[last], this was +# achieved at cost of two additional inter-registers moves; +# - some instruction reordering and interleaving; +# - this module doesn't implement key setup subroutine, instead it +# relies on conversion of "conventional" key schedule as returned +# by AES_set_encrypt_key (see discussion below); +# - first and last round keys are treated differently, which allowed +# to skip one shiftrows(), reduce bit-sliced key schedule and +# speed-up conversion by 22%; +# - support for 192- and 256-bit keys was added; +# +# Resulting performance in CPU cycles spent to encrypt one byte out +# of 4096-byte buffer with 128-bit key is: +# +# Emilia's this(*) difference +# +# Core 2 9.30 8.69 +7% +# Nehalem(**) 7.63 6.98 +9% +# Atom 17.1 17.4 -2%(***) +# +# (*) Comparison is not completely fair, because "this" is ECB, +# i.e. no extra processing such as counter values calculation +# and xor-ing input as in Emilia's CTR implementation is +# performed. However, the CTR calculations stand for not more +# than 1% of total time, so comparison is *rather* fair. +# +# (**) Results were collected on Westmere, which is considered to +# be equivalent to Nehalem for this code. +# +# (***) Slowdown on Atom is rather strange per se, because original +# implementation has a number of 9+-bytes instructions, which +# are bad for Atom front-end, and which I eliminated completely. +# In attempt to address deterioration sbox() was tested in FP +# SIMD "domain" (movaps instead of movdqa, xorps instead of +# pxor, etc.). While it resulted in nominal 4% improvement on +# Atom, it hurted Westmere by more than 2x factor. +# +# As for key schedule conversion subroutine. Interface to OpenSSL +# relies on per-invocation on-the-fly conversion. This naturally +# has impact on performance, especially for short inputs. Conversion +# time in CPU cycles and its ratio to CPU cycles spent in 8x block +# function is: +# +# conversion conversion/8x block +# Core 2 240 0.22 +# Nehalem 180 0.20 +# Atom 430 0.19 +# +# The ratio values mean that 128-byte blocks will be processed +# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%, +# etc. Then keep in mind that input sizes not divisible by 128 are +# *effectively* slower, especially shortest ones, e.g. consecutive +# 144-byte blocks are processed 44% slower than one would expect, +# 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings" +# it's still faster than ["hyper-threading-safe" code path in] +# aes-x86_64.pl on all lengths above 64 bytes... +# +# October 2011. +# +# Add decryption procedure. Performance in CPU cycles spent to decrypt +# one byte out of 4096-byte buffer with 128-bit key is: +# +# Core 2 9.83 +# Nehalem 7.74 +# Atom 19.0 +# +# November 2011. +# +# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is +# suboptimal, but XTS is meant to be used with larger blocks... +# +# + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); +my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) +my $ecb=0; # suppress unreferenced ECB subroutines, spare some space... + +{ +my ($key,$rounds,$const)=("%rax","%r10d","%r11"); + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + pxor @b[6], @b[5] + pxor @b[1], @b[2] + pxor @b[0], @b[3] + pxor @b[2], @b[6] + pxor @b[0], @b[5] + + pxor @b[3], @b[6] + pxor @b[7], @b[3] + pxor @b[5], @b[7] + pxor @b[4], @b[3] + pxor @b[5], @b[4] + pxor @b[1], @b[3] + + pxor @b[7], @b[2] + pxor @b[5], @b[1] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + pxor @b[6], @b[0] + pxor @b[4], @b[1] + pxor @b[0], @b[2] + pxor @b[6], @b[4] + pxor @b[1], @b[6] + + pxor @b[5], @b[1] + pxor @b[3], @b[5] + pxor @b[7], @b[3] + pxor @b[5], @b[7] + pxor @b[5], @b[2] + + pxor @b[7], @b[4] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + pxor @b[7], @b[4] + + pxor @b[5], @b[7] + pxor @b[5], @b[2] + pxor @b[7], @b[3] + pxor @b[3], @b[5] + pxor @b[5], @b[1] + + pxor @b[1], @b[6] + pxor @b[0], @b[2] + pxor @b[6], @b[4] + pxor @b[6], @b[0] + pxor @b[4], @b[1] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + pxor @b[5], @b[1] + pxor @b[7], @b[2] + + pxor @b[1], @b[3] + pxor @b[5], @b[4] + pxor @b[5], @b[7] + pxor @b[4], @b[3] + pxor @b[0], @b[5] + pxor @b[7], @b[3] + pxor @b[2], @b[6] + pxor @b[1], @b[2] + pxor @b[3], @b[6] + + pxor @b[0], @b[3] + pxor @b[6], @b[5] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + movdqa $y0, $t0 + pxor $y1, $t0 + pand $x0, $t0 + pxor $x1, $x0 + pand $y0, $x1 + pand $y1, $x0 + pxor $x1, $x0 + pxor $t0, $x1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + movdqa $y0, $t0 + pxor $y1, $t0 + pand $x0, $t0 + pxor $x1, $x0 + pand $y0, $x1 + pand $y1, $x0 + pxor $x0, $x1 + pxor $t0, $x0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + movdqa $y0, $t0 + movdqa $y2, $t1 + pxor $y1, $t0 + pxor $y3, $t1 + pand $x0, $t0 + pand $x2, $t1 + pxor $x1, $x0 + pxor $x3, $x2 + pand $y0, $x1 + pand $y2, $x3 + pand $y1, $x0 + pand $y3, $x2 + pxor $x0, $x1 + pxor $x3, $x2 + pxor $t0, $x0 + pxor $t1, $x3 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + movdqa @x[0], @t[0] + movdqa @x[1], @t[1] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]); +$code.=<<___; + pxor @x[2], @t[0] + pxor @x[3], @t[1] + pxor @y[2], @y[0] + pxor @y[3], @y[1] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + pxor @t[0], @x[0] + pxor @t[0], @x[2] + pxor @t[1], @x[1] + pxor @t[1], @x[3] + + movdqa @x[4], @t[0] + movdqa @x[5], @t[1] + pxor @x[6], @t[0] + pxor @x[7], @t[1] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + pxor @y[2], @y[0] + pxor @y[3], @y[1] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]); +$code.=<<___; + pxor @t[0], @x[4] + pxor @t[0], @x[6] + pxor @t[1], @x[5] + pxor @t[1], @x[7] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + movdqa @x[4], @t[3] + movdqa @x[5], @t[2] + movdqa @x[1], @t[1] + movdqa @x[7], @s[1] + movdqa @x[0], @s[0] + + pxor @x[6], @t[3] + pxor @x[7], @t[2] + pxor @x[3], @t[1] + movdqa @t[3], @s[2] + pxor @x[6], @s[1] + movdqa @t[2], @t[0] + pxor @x[2], @s[0] + movdqa @t[3], @s[3] + + por @t[1], @t[2] + por @s[0], @t[3] + pxor @t[0], @s[3] + pand @s[0], @s[2] + pxor @t[1], @s[0] + pand @t[1], @t[0] + pand @s[0], @s[3] + movdqa @x[3], @s[0] + pxor @x[2], @s[0] + pand @s[0], @s[1] + pxor @s[1], @t[3] + pxor @s[1], @t[2] + movdqa @x[4], @s[1] + movdqa @x[1], @s[0] + pxor @x[5], @s[1] + pxor @x[0], @s[0] + movdqa @s[1], @t[1] + pand @s[0], @s[1] + por @s[0], @t[1] + pxor @s[1], @t[0] + pxor @s[3], @t[3] + pxor @s[2], @t[2] + pxor @s[3], @t[1] + movdqa @x[7], @s[0] + pxor @s[2], @t[0] + movdqa @x[6], @s[1] + pxor @s[2], @t[1] + movdqa @x[5], @s[2] + pand @x[3], @s[0] + movdqa @x[4], @s[3] + pand @x[2], @s[1] + pand @x[1], @s[2] + por @x[0], @s[3] + pxor @s[0], @t[3] + pxor @s[1], @t[2] + pxor @s[2], @t[1] + pxor @s[3], @t[0] + + #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + # new smaller inversion + + movdqa @t[3], @s[0] + pand @t[1], @t[3] + pxor @t[2], @s[0] + + movdqa @t[0], @s[2] + movdqa @s[0], @s[3] + pxor @t[3], @s[2] + pand @s[2], @s[3] + + movdqa @t[1], @s[1] + pxor @t[2], @s[3] + pxor @t[0], @s[1] + + pxor @t[2], @t[3] + + pand @t[3], @s[1] + + movdqa @s[2], @t[2] + pxor @t[0], @s[1] + + pxor @s[1], @t[2] + pxor @s[1], @t[1] + + pand @t[0], @t[2] + + pxor @t[2], @s[2] + pxor @t[2], @t[1] + + pand @s[3], @s[2] + + pxor @s[0], @s[2] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my $mask=pop; +$code.=<<___; + pxor 0x00($key),@x[0] + pxor 0x10($key),@x[1] + pshufb $mask,@x[0] + pxor 0x20($key),@x[2] + pshufb $mask,@x[1] + pxor 0x30($key),@x[3] + pshufb $mask,@x[2] + pxor 0x40($key),@x[4] + pshufb $mask,@x[3] + pxor 0x50($key),@x[5] + pshufb $mask,@x[4] + pxor 0x60($key),@x[6] + pshufb $mask,@x[5] + pxor 0x70($key),@x[7] + pshufb $mask,@x[6] + lea 0x80($key),$key + pshufb $mask,@x[7] +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 + pshufd \$0x93, @x[1], @t[1] + pxor @t[0], @x[0] # x0 ^ (x0 <<< 32) + pshufd \$0x93, @x[2], @t[2] + pxor @t[1], @x[1] + pshufd \$0x93, @x[3], @t[3] + pxor @t[2], @x[2] + pshufd \$0x93, @x[4], @t[4] + pxor @t[3], @x[3] + pshufd \$0x93, @x[5], @t[5] + pxor @t[4], @x[4] + pshufd \$0x93, @x[6], @t[6] + pxor @t[5], @x[5] + pshufd \$0x93, @x[7], @t[7] + pxor @t[6], @x[6] + pxor @t[7], @x[7] + + pxor @x[0], @t[1] + pxor @x[7], @t[0] + pxor @x[7], @t[1] + pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64) + pxor @x[1], @t[2] + pshufd \$0x4E, @x[1], @x[1] + pxor @x[4], @t[5] + pxor @t[0], @x[0] + pxor @x[5], @t[6] + pxor @t[1], @x[1] + pxor @x[3], @t[4] + pshufd \$0x4E, @x[4], @t[0] + pxor @x[6], @t[7] + pshufd \$0x4E, @x[5], @t[1] + pxor @x[2], @t[3] + pshufd \$0x4E, @x[3], @x[4] + pxor @x[7], @t[3] + pshufd \$0x4E, @x[7], @x[5] + pxor @x[7], @t[4] + pshufd \$0x4E, @x[6], @x[3] + pxor @t[4], @t[0] + pshufd \$0x4E, @x[2], @x[6] + pxor @t[5], @t[1] +___ +$code.=<<___ if (!$inv); + pxor @t[3], @x[4] + pxor @t[7], @x[5] + pxor @t[6], @x[3] + movdqa @t[0], @x[2] + pxor @t[2], @x[6] + movdqa @t[1], @x[7] +___ +$code.=<<___ if ($inv); + pxor @x[4], @t[3] + pxor @t[7], @x[5] + pxor @x[3], @t[6] + movdqa @t[0], @x[3] + pxor @t[2], @x[6] + movdqa @t[6], @x[2] + movdqa @t[1], @x[7] + movdqa @x[6], @x[4] + movdqa @t[3], @x[6] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + # multiplication by 0x0e + pshufd \$0x93, @x[7], @t[7] + movdqa @x[2], @t[2] + pxor @x[5], @x[7] # 7 5 + pxor @x[5], @x[2] # 2 5 + pshufd \$0x93, @x[0], @t[0] + movdqa @x[5], @t[5] + pxor @x[0], @x[5] # 5 0 [1] + pxor @x[1], @x[0] # 0 1 + pshufd \$0x93, @x[1], @t[1] + pxor @x[2], @x[1] # 1 25 + pxor @x[6], @x[0] # 01 6 [2] + pxor @x[3], @x[1] # 125 3 [4] + pshufd \$0x93, @x[3], @t[3] + pxor @x[0], @x[2] # 25 016 [3] + pxor @x[7], @x[3] # 3 75 + pxor @x[6], @x[7] # 75 6 [0] + pshufd \$0x93, @x[6], @t[6] + movdqa @x[4], @t[4] + pxor @x[4], @x[6] # 6 4 + pxor @x[3], @x[4] # 4 375 [6] + pxor @x[7], @x[3] # 375 756=36 + pxor @t[5], @x[6] # 64 5 [7] + pxor @t[2], @x[3] # 36 2 + pxor @t[4], @x[3] # 362 4 [5] + pshufd \$0x93, @t[5], @t[5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + # multiplication by 0x0b + pxor @y[0], @y[1] + pxor @t[0], @y[0] + pxor @t[1], @y[1] + pshufd \$0x93, @t[2], @t[2] + pxor @t[5], @y[0] + pxor @t[6], @y[1] + pxor @t[7], @y[0] + pshufd \$0x93, @t[4], @t[4] + pxor @t[6], @t[7] # clobber t[7] + pxor @y[0], @y[1] + + pxor @t[0], @y[3] + pshufd \$0x93, @t[0], @t[0] + pxor @t[1], @y[2] + pxor @t[1], @y[4] + pxor @t[2], @y[2] + pshufd \$0x93, @t[1], @t[1] + pxor @t[2], @y[3] + pxor @t[2], @y[5] + pxor @t[7], @y[2] + pshufd \$0x93, @t[2], @t[2] + pxor @t[3], @y[3] + pxor @t[3], @y[6] + pxor @t[3], @y[4] + pshufd \$0x93, @t[3], @t[3] + pxor @t[4], @y[7] + pxor @t[4], @y[5] + pxor @t[7], @y[7] + pxor @t[5], @y[3] + pxor @t[4], @y[4] + pxor @t[5], @t[7] # clobber t[7] even more + + pxor @t[7], @y[5] + pshufd \$0x93, @t[4], @t[4] + pxor @t[7], @y[6] + pxor @t[7], @y[4] + + pxor @t[5], @t[7] + pshufd \$0x93, @t[5], @t[5] + pxor @t[6], @t[7] # restore t[7] + + # multiplication by 0x0d + pxor @y[7], @y[4] + pxor @t[4], @y[7] + pshufd \$0x93, @t[6], @t[6] + pxor @t[0], @y[2] + pxor @t[5], @y[7] + pxor @t[2], @y[2] + pshufd \$0x93, @t[7], @t[7] + + pxor @y[1], @y[3] + pxor @t[1], @y[1] + pxor @t[0], @y[0] + pxor @t[0], @y[3] + pxor @t[5], @y[1] + pxor @t[5], @y[0] + pxor @t[7], @y[1] + pshufd \$0x93, @t[0], @t[0] + pxor @t[6], @y[0] + pxor @y[1], @y[3] + pxor @t[1], @y[4] + pshufd \$0x93, @t[1], @t[1] + + pxor @t[7], @y[7] + pxor @t[2], @y[4] + pxor @t[2], @y[5] + pshufd \$0x93, @t[2], @t[2] + pxor @t[6], @y[2] + pxor @t[3], @t[6] # clobber t[6] + pxor @y[7], @y[4] + pxor @t[6], @y[3] + + pxor @t[6], @y[6] + pxor @t[5], @y[5] + pxor @t[4], @y[6] + pshufd \$0x93, @t[4], @t[4] + pxor @t[6], @y[5] + pxor @t[7], @y[6] + pxor @t[3], @t[6] # restore t[6] + + pshufd \$0x93, @t[5], @t[5] + pshufd \$0x93, @t[6], @t[6] + pshufd \$0x93, @t[7], @t[7] + pshufd \$0x93, @t[3], @t[3] + + # multiplication by 0x09 + pxor @y[1], @y[4] + pxor @y[1], @t[1] # t[1]=y[1] + pxor @t[5], @t[0] # clobber t[0] + pxor @t[5], @t[1] + pxor @t[0], @y[3] + pxor @y[0], @t[0] # t[0]=y[0] + pxor @t[6], @t[1] + pxor @t[7], @t[6] # clobber t[6] + pxor @t[1], @y[4] + pxor @t[4], @y[7] + pxor @y[4], @t[4] # t[4]=y[4] + pxor @t[3], @y[6] + pxor @y[3], @t[3] # t[3]=y[3] + pxor @t[2], @y[5] + pxor @y[2], @t[2] # t[2]=y[2] + pxor @t[7], @t[3] + pxor @y[5], @t[5] # t[5]=y[5] + pxor @t[6], @t[2] + pxor @t[6], @t[5] + pxor @y[6], @t[6] # t[6]=y[6] + pxor @y[7], @t[7] # t[7]=y[7] + + movdqa @t[0],@XMM[0] + movdqa @t[1],@XMM[1] + movdqa @t[2],@XMM[2] + movdqa @t[3],@XMM[3] + movdqa @t[4],@XMM[4] + movdqa @t[5],@XMM[5] + movdqa @t[6],@XMM[6] + movdqa @t[7],@XMM[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + # multiplication by 0x05-0x00-0x04-0x00 + pshufd \$0x4E, @x[0], @t[0] + pshufd \$0x4E, @x[6], @t[6] + pxor @x[0], @t[0] + pshufd \$0x4E, @x[7], @t[7] + pxor @x[6], @t[6] + pshufd \$0x4E, @x[1], @t[1] + pxor @x[7], @t[7] + pshufd \$0x4E, @x[2], @t[2] + pxor @x[1], @t[1] + pshufd \$0x4E, @x[3], @t[3] + pxor @x[2], @t[2] + pxor @t[6], @x[0] + pxor @t[6], @x[1] + pshufd \$0x4E, @x[4], @t[4] + pxor @x[3], @t[3] + pxor @t[0], @x[2] + pxor @t[1], @x[3] + pshufd \$0x4E, @x[5], @t[5] + pxor @x[4], @t[4] + pxor @t[7], @x[1] + pxor @t[2], @x[4] + pxor @x[5], @t[5] + + pxor @t[7], @x[2] + pxor @t[6], @x[3] + pxor @t[6], @x[4] + pxor @t[3], @x[5] + pxor @t[4], @x[6] + pxor @t[7], @x[4] + pxor @t[7], @x[5] + pxor @t[5], @x[7] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub aesenc { # not used +my @b=@_[0..7]; +my @t=@_[8..15]; +$code.=<<___; + movdqa 0x30($const),@t[0] # .LSR +___ + &ShiftRows (@b,@t[0]); + &Sbox (@b,@t); + &MixColumns (@b[0,1,4,6,3,7,2,5],@t); +} + +sub aesenclast { # not used +my @b=@_[0..7]; +my @t=@_[8..15]; +$code.=<<___; + movdqa 0x40($const),@t[0] # .LSRM0 +___ + &ShiftRows (@b,@t[0]); + &Sbox (@b,@t); +$code.=<<___ + pxor 0x00($key),@b[0] + pxor 0x10($key),@b[1] + pxor 0x20($key),@b[4] + pxor 0x30($key),@b[6] + pxor 0x40($key),@b[3] + pxor 0x50($key),@b[7] + pxor 0x60($key),@b[2] + pxor 0x70($key),@b[5] +___ +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + movdqa $b,$t + psrlq \$$n,$b + pxor $a,$b + pand $mask,$b + pxor $b,$a + psllq \$$n,$b + pxor $t,$b +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + movdqa $b0,$t0 + psrlq \$$n,$b0 + movdqa $b1,$t1 + psrlq \$$n,$b1 + pxor $a0,$b0 + pxor $a1,$b1 + pand $mask,$b0 + pand $mask,$b1 + pxor $b0,$a0 + psllq \$$n,$b0 + pxor $b1,$a1 + psllq \$$n,$b1 + pxor $t0,$b0 + pxor $t1,$b1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + movdqa 0x00($const),$t0 # .LBS0 + movdqa 0x10($const),$t1 # .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + movdqa 0x20($const),$t0 # .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +.text + +.extern asm_AES_encrypt +.extern asm_AES_decrypt + +.type _bsaes_encrypt8,\@abi-omnipotent +.align 64 +_bsaes_encrypt8: + lea .LBS0(%rip), $const # constants table + + movdqa ($key), @XMM[9] # round 0 key + lea 0x10($key), $key + movdqa 0x50($const), @XMM[8] # .LM0SR + pxor @XMM[9], @XMM[0] # xor with round0 key + pxor @XMM[9], @XMM[1] + pshufb @XMM[8], @XMM[0] + pxor @XMM[9], @XMM[2] + pshufb @XMM[8], @XMM[1] + pxor @XMM[9], @XMM[3] + pshufb @XMM[8], @XMM[2] + pxor @XMM[9], @XMM[4] + pshufb @XMM[8], @XMM[3] + pxor @XMM[9], @XMM[5] + pshufb @XMM[8], @XMM[4] + pxor @XMM[9], @XMM[6] + pshufb @XMM[8], @XMM[5] + pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[6] + pshufb @XMM[8], @XMM[7] +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + dec $rounds + jmp .Lenc_sbox +.align 16 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + dec $rounds + jl .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + movdqa 0x30($const), @XMM[8] # .LSR + jnz .Lenc_loop + movdqa 0x40($const), @XMM[8] # .LSRM0 + jmp .Lenc_loop +.align 16 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + movdqa ($key), @XMM[8] # last round key + pxor @XMM[8], @XMM[4] + pxor @XMM[8], @XMM[6] + pxor @XMM[8], @XMM[3] + pxor @XMM[8], @XMM[7] + pxor @XMM[8], @XMM[2] + pxor @XMM[8], @XMM[5] + pxor @XMM[8], @XMM[0] + pxor @XMM[8], @XMM[1] + ret +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,\@abi-omnipotent +.align 64 +_bsaes_decrypt8: + lea .LBS0(%rip), $const # constants table + + movdqa ($key), @XMM[9] # round 0 key + lea 0x10($key), $key + movdqa -0x30($const), @XMM[8] # .LM0ISR + pxor @XMM[9], @XMM[0] # xor with round0 key + pxor @XMM[9], @XMM[1] + pshufb @XMM[8], @XMM[0] + pxor @XMM[9], @XMM[2] + pshufb @XMM[8], @XMM[1] + pxor @XMM[9], @XMM[3] + pshufb @XMM[8], @XMM[2] + pxor @XMM[9], @XMM[4] + pshufb @XMM[8], @XMM[3] + pxor @XMM[9], @XMM[5] + pshufb @XMM[8], @XMM[4] + pxor @XMM[9], @XMM[6] + pshufb @XMM[8], @XMM[5] + pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[6] + pshufb @XMM[8], @XMM[7] +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + dec $rounds + jmp .Ldec_sbox +.align 16 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + dec $rounds + jl .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + movdqa -0x10($const), @XMM[8] # .LISR + jnz .Ldec_loop + movdqa -0x20($const), @XMM[8] # .LISRM0 + jmp .Ldec_loop +.align 16 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + movdqa ($key), @XMM[8] # last round key + pxor @XMM[8], @XMM[6] + pxor @XMM[8], @XMM[4] + pxor @XMM[8], @XMM[2] + pxor @XMM[8], @XMM[7] + pxor @XMM[8], @XMM[3] + pxor @XMM[8], @XMM[5] + pxor @XMM[8], @XMM[0] + pxor @XMM[8], @XMM[1] + ret +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + #&swapmove(@x[2,3],1,$t0,$t2,$t3); + movdqa @x[0], @x[2] + movdqa @x[1], @x[3] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + movdqa @x[0], @x[4] + movdqa @x[2], @x[6] + movdqa @x[1], @x[5] + movdqa @x[3], @x[7] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,\@abi-omnipotent +.align 16 +_bsaes_key_convert: + lea .Lmasks(%rip), $const + movdqu ($inp), %xmm7 # load round 0 key + lea 0x10($inp), $inp + movdqa 0x00($const), %xmm0 # 0x01... + movdqa 0x10($const), %xmm1 # 0x02... + movdqa 0x20($const), %xmm2 # 0x04... + movdqa 0x30($const), %xmm3 # 0x08... + movdqa 0x40($const), %xmm4 # .LM0 + pcmpeqd %xmm5, %xmm5 # .LNOT + + movdqu ($inp), %xmm6 # load round 1 key + movdqa %xmm7, ($out) # save round 0 key + lea 0x10($out), $out + dec $rounds + jmp .Lkey_loop +.align 16 +.Lkey_loop: + pshufb %xmm4, %xmm6 # .LM0 + + movdqa %xmm0, %xmm8 + movdqa %xmm1, %xmm9 + + pand %xmm6, %xmm8 + pand %xmm6, %xmm9 + movdqa %xmm2, %xmm10 + pcmpeqb %xmm0, %xmm8 + psllq \$4, %xmm0 # 0x10... + movdqa %xmm3, %xmm11 + pcmpeqb %xmm1, %xmm9 + psllq \$4, %xmm1 # 0x20... + + pand %xmm6, %xmm10 + pand %xmm6, %xmm11 + movdqa %xmm0, %xmm12 + pcmpeqb %xmm2, %xmm10 + psllq \$4, %xmm2 # 0x40... + movdqa %xmm1, %xmm13 + pcmpeqb %xmm3, %xmm11 + psllq \$4, %xmm3 # 0x80... + + movdqa %xmm2, %xmm14 + movdqa %xmm3, %xmm15 + pxor %xmm5, %xmm8 # "pnot" + pxor %xmm5, %xmm9 + + pand %xmm6, %xmm12 + pand %xmm6, %xmm13 + movdqa %xmm8, 0x00($out) # write bit-sliced round key + pcmpeqb %xmm0, %xmm12 + psrlq \$4, %xmm0 # 0x01... + movdqa %xmm9, 0x10($out) + pcmpeqb %xmm1, %xmm13 + psrlq \$4, %xmm1 # 0x02... + lea 0x10($inp), $inp + + pand %xmm6, %xmm14 + pand %xmm6, %xmm15 + movdqa %xmm10, 0x20($out) + pcmpeqb %xmm2, %xmm14 + psrlq \$4, %xmm2 # 0x04... + movdqa %xmm11, 0x30($out) + pcmpeqb %xmm3, %xmm15 + psrlq \$4, %xmm3 # 0x08... + movdqu ($inp), %xmm6 # load next round key + + pxor %xmm5, %xmm13 # "pnot" + pxor %xmm5, %xmm14 + movdqa %xmm12, 0x40($out) + movdqa %xmm13, 0x50($out) + movdqa %xmm14, 0x60($out) + movdqa %xmm15, 0x70($out) + lea 0x80($out),$out + dec $rounds + jnz .Lkey_loop + + movdqa 0x50($const), %xmm7 # .L63 + #movdqa %xmm6, ($out) # don't save last round key + ret +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0 && !$win64) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,\@function,2 +.align 16 +bsaes_enc_key_convert: + mov 240($inp),%r10d # pass rounds + mov $inp,%rcx # pass key + mov $out,%rax # pass key schedule + call _bsaes_key_convert + pxor %xmm6,%xmm7 # fix up last round key + movdqa %xmm7,(%rax) # save last round key + ret +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,\@function,4 +.align 16 +bsaes_encrypt_128: +.Lenc128_loop: + movdqu 0x00($inp), @XMM[0] # load input + movdqu 0x10($inp), @XMM[1] + movdqu 0x20($inp), @XMM[2] + movdqu 0x30($inp), @XMM[3] + movdqu 0x40($inp), @XMM[4] + movdqu 0x50($inp), @XMM[5] + movdqu 0x60($inp), @XMM[6] + movdqu 0x70($inp), @XMM[7] + mov $key, %rax # pass the $key + lea 0x80($inp), $inp + mov \$10,%r10d + + call _bsaes_encrypt8 + + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[2], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + sub \$0x80,$len + ja .Lenc128_loop + ret +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,\@function,2 +.align 16 +bsaes_dec_key_convert: + mov 240($inp),%r10d # pass rounds + mov $inp,%rcx # pass key + mov $out,%rax # pass key schedule + call _bsaes_key_convert + pxor ($out),%xmm7 # fix up round 0 key + movdqa %xmm6,(%rax) # save last round key + movdqa %xmm7,($out) + ret +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,\@function,4 +.align 16 +bsaes_decrypt_128: +.Ldec128_loop: + movdqu 0x00($inp), @XMM[0] # load input + movdqu 0x10($inp), @XMM[1] + movdqu 0x20($inp), @XMM[2] + movdqu 0x30($inp), @XMM[3] + movdqu 0x40($inp), @XMM[4] + movdqu 0x50($inp), @XMM[5] + movdqu 0x60($inp), @XMM[6] + movdqu 0x70($inp), @XMM[7] + mov $key, %rax # pass the $key + lea 0x80($inp), $inp + mov \$10,%r10d + + call _bsaes_decrypt8 + + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + sub \$0x80,$len + ja .Ldec128_loop + ret +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +###################################################################### +# +# OpenSSL interface +# +my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d") + : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d"); +my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15"); + +if ($ecb) { +$code.=<<___; +.globl bsaes_ecb_encrypt_blocks +.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent +.align 16 +bsaes_ecb_encrypt_blocks: + mov %rsp, %rax +.Lecb_enc_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp),%rsp +___ +$code.=<<___ if ($win64); + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lecb_enc_body: +___ +$code.=<<___; + mov %rsp,%rbp # backup %rsp + mov 240($arg4),%eax # rounds + mov $arg1,$inp # backup arguments + mov $arg2,$out + mov $arg3,$len + mov $arg4,$key + cmp \$8,$arg3 + jb .Lecb_enc_short + + mov %eax,%ebx # backup rounds + shl \$7,%rax # 128 bytes per inner round key + sub \$`128-32`,%rax # size of bit-sliced key schedule + sub %rax,%rsp + mov %rsp,%rax # pass key schedule + mov $key,%rcx # pass key + mov %ebx,%r10d # pass rounds + call _bsaes_key_convert + pxor %xmm6,%xmm7 # fix up last round key + movdqa %xmm7,(%rax) # save last round key + + sub \$8,$len +.Lecb_enc_loop: + movdqu 0x00($inp), @XMM[0] # load input + movdqu 0x10($inp), @XMM[1] + movdqu 0x20($inp), @XMM[2] + movdqu 0x30($inp), @XMM[3] + movdqu 0x40($inp), @XMM[4] + movdqu 0x50($inp), @XMM[5] + mov %rsp, %rax # pass key schedule + movdqu 0x60($inp), @XMM[6] + mov %ebx,%r10d # pass rounds + movdqu 0x70($inp), @XMM[7] + lea 0x80($inp), $inp + + call _bsaes_encrypt8 + + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[2], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + sub \$8,$len + jnc .Lecb_enc_loop + + add \$8,$len + jz .Lecb_enc_done + + movdqu 0x00($inp), @XMM[0] # load input + mov %rsp, %rax # pass key schedule + mov %ebx,%r10d # pass rounds + cmp \$2,$len + jb .Lecb_enc_one + movdqu 0x10($inp), @XMM[1] + je .Lecb_enc_two + movdqu 0x20($inp), @XMM[2] + cmp \$4,$len + jb .Lecb_enc_three + movdqu 0x30($inp), @XMM[3] + je .Lecb_enc_four + movdqu 0x40($inp), @XMM[4] + cmp \$6,$len + jb .Lecb_enc_five + movdqu 0x50($inp), @XMM[5] + je .Lecb_enc_six + movdqu 0x60($inp), @XMM[6] + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[2], 0x60($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_six: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + movdqu @XMM[7], 0x50($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_five: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_four: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_three: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_two: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_one: + call _bsaes_encrypt8 + movdqu @XMM[0], 0x00($out) # write output + jmp .Lecb_enc_done +.align 16 +.Lecb_enc_short: + lea ($inp), $arg1 + lea ($out), $arg2 + lea ($key), $arg3 + call asm_AES_encrypt + lea 16($inp), $inp + lea 16($out), $out + dec $len + jnz .Lecb_enc_short + +.Lecb_enc_done: + lea (%rsp),%rax + pxor %xmm0, %xmm0 +.Lecb_enc_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + jb .Lecb_enc_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lecb_enc_epilogue: + ret +.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks + +.globl bsaes_ecb_decrypt_blocks +.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent +.align 16 +bsaes_ecb_decrypt_blocks: + mov %rsp, %rax +.Lecb_dec_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp),%rsp +___ +$code.=<<___ if ($win64); + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lecb_dec_body: +___ +$code.=<<___; + mov %rsp,%rbp # backup %rsp + mov 240($arg4),%eax # rounds + mov $arg1,$inp # backup arguments + mov $arg2,$out + mov $arg3,$len + mov $arg4,$key + cmp \$8,$arg3 + jb .Lecb_dec_short + + mov %eax,%ebx # backup rounds + shl \$7,%rax # 128 bytes per inner round key + sub \$`128-32`,%rax # size of bit-sliced key schedule + sub %rax,%rsp + mov %rsp,%rax # pass key schedule + mov $key,%rcx # pass key + mov %ebx,%r10d # pass rounds + call _bsaes_key_convert + pxor (%rsp),%xmm7 # fix up 0 round key + movdqa %xmm6,(%rax) # save last round key + movdqa %xmm7,(%rsp) + + sub \$8,$len +.Lecb_dec_loop: + movdqu 0x00($inp), @XMM[0] # load input + movdqu 0x10($inp), @XMM[1] + movdqu 0x20($inp), @XMM[2] + movdqu 0x30($inp), @XMM[3] + movdqu 0x40($inp), @XMM[4] + movdqu 0x50($inp), @XMM[5] + mov %rsp, %rax # pass key schedule + movdqu 0x60($inp), @XMM[6] + mov %ebx,%r10d # pass rounds + movdqu 0x70($inp), @XMM[7] + lea 0x80($inp), $inp + + call _bsaes_decrypt8 + + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + sub \$8,$len + jnc .Lecb_dec_loop + + add \$8,$len + jz .Lecb_dec_done + + movdqu 0x00($inp), @XMM[0] # load input + mov %rsp, %rax # pass key schedule + mov %ebx,%r10d # pass rounds + cmp \$2,$len + jb .Lecb_dec_one + movdqu 0x10($inp), @XMM[1] + je .Lecb_dec_two + movdqu 0x20($inp), @XMM[2] + cmp \$4,$len + jb .Lecb_dec_three + movdqu 0x30($inp), @XMM[3] + je .Lecb_dec_four + movdqu 0x40($inp), @XMM[4] + cmp \$6,$len + jb .Lecb_dec_five + movdqu 0x50($inp), @XMM[5] + je .Lecb_dec_six + movdqu 0x60($inp), @XMM[6] + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_six: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_five: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_four: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_three: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_two: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_one: + call _bsaes_decrypt8 + movdqu @XMM[0], 0x00($out) # write output + jmp .Lecb_dec_done +.align 16 +.Lecb_dec_short: + lea ($inp), $arg1 + lea ($out), $arg2 + lea ($key), $arg3 + call asm_AES_decrypt + lea 16($inp), $inp + lea 16($out), $out + dec $len + jnz .Lecb_dec_short + +.Lecb_dec_done: + lea (%rsp),%rax + pxor %xmm0, %xmm0 +.Lecb_dec_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + jb .Lecb_dec_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lecb_dec_epilogue: + ret +.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks +___ +} +$code.=<<___; +.extern asm_AES_cbc_encrypt +.globl bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,\@abi-omnipotent +.align 16 +bsaes_cbc_encrypt: +___ +$code.=<<___ if ($win64); + mov 48(%rsp),$arg6 # pull direction flag +___ +$code.=<<___; + cmp \$0,$arg6 + jne asm_AES_cbc_encrypt + cmp \$128,$arg3 + jb asm_AES_cbc_encrypt + + mov %rsp, %rax +.Lcbc_dec_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp), %rsp +___ +$code.=<<___ if ($win64); + mov 0xa0(%rsp),$arg5 # pull ivp + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lcbc_dec_body: +___ +$code.=<<___; + mov %rsp, %rbp # backup %rsp + mov 240($arg4), %eax # rounds + mov $arg1, $inp # backup arguments + mov $arg2, $out + mov $arg3, $len + mov $arg4, $key + mov $arg5, %rbx + shr \$4, $len # bytes to blocks + + mov %eax, %edx # rounds + shl \$7, %rax # 128 bytes per inner round key + sub \$`128-32`, %rax # size of bit-sliced key schedule + sub %rax, %rsp + + mov %rsp, %rax # pass key schedule + mov $key, %rcx # pass key + mov %edx, %r10d # pass rounds + call _bsaes_key_convert + pxor (%rsp),%xmm7 # fix up 0 round key + movdqa %xmm6,(%rax) # save last round key + movdqa %xmm7,(%rsp) + + movdqu (%rbx), @XMM[15] # load IV + sub \$8,$len +.Lcbc_dec_loop: + movdqu 0x00($inp), @XMM[0] # load input + movdqu 0x10($inp), @XMM[1] + movdqu 0x20($inp), @XMM[2] + movdqu 0x30($inp), @XMM[3] + movdqu 0x40($inp), @XMM[4] + movdqu 0x50($inp), @XMM[5] + mov %rsp, %rax # pass key schedule + movdqu 0x60($inp), @XMM[6] + mov %edx,%r10d # pass rounds + movdqu 0x70($inp), @XMM[7] + movdqa @XMM[15], 0x20(%rbp) # put aside IV + + call _bsaes_decrypt8 + + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[10] + pxor @XMM[9], @XMM[6] + movdqu 0x30($inp), @XMM[11] + pxor @XMM[10], @XMM[4] + movdqu 0x40($inp), @XMM[12] + pxor @XMM[11], @XMM[2] + movdqu 0x50($inp), @XMM[13] + pxor @XMM[12], @XMM[7] + movdqu 0x60($inp), @XMM[14] + pxor @XMM[13], @XMM[3] + movdqu 0x70($inp), @XMM[15] # IV + pxor @XMM[14], @XMM[5] + movdqu @XMM[0], 0x00($out) # write output + lea 0x80($inp), $inp + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + sub \$8,$len + jnc .Lcbc_dec_loop + + add \$8,$len + jz .Lcbc_dec_done + + movdqu 0x00($inp), @XMM[0] # load input + mov %rsp, %rax # pass key schedule + mov %edx, %r10d # pass rounds + cmp \$2,$len + jb .Lcbc_dec_one + movdqu 0x10($inp), @XMM[1] + je .Lcbc_dec_two + movdqu 0x20($inp), @XMM[2] + cmp \$4,$len + jb .Lcbc_dec_three + movdqu 0x30($inp), @XMM[3] + je .Lcbc_dec_four + movdqu 0x40($inp), @XMM[4] + cmp \$6,$len + jb .Lcbc_dec_five + movdqu 0x50($inp), @XMM[5] + je .Lcbc_dec_six + movdqu 0x60($inp), @XMM[6] + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[10] + pxor @XMM[9], @XMM[6] + movdqu 0x30($inp), @XMM[11] + pxor @XMM[10], @XMM[4] + movdqu 0x40($inp), @XMM[12] + pxor @XMM[11], @XMM[2] + movdqu 0x50($inp), @XMM[13] + pxor @XMM[12], @XMM[7] + movdqu 0x60($inp), @XMM[15] # IV + pxor @XMM[13], @XMM[3] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[10] + pxor @XMM[9], @XMM[6] + movdqu 0x30($inp), @XMM[11] + pxor @XMM[10], @XMM[4] + movdqu 0x40($inp), @XMM[12] + pxor @XMM[11], @XMM[2] + movdqu 0x50($inp), @XMM[15] # IV + pxor @XMM[12], @XMM[7] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[10] + pxor @XMM[9], @XMM[6] + movdqu 0x30($inp), @XMM[11] + pxor @XMM[10], @XMM[4] + movdqu 0x40($inp), @XMM[15] # IV + pxor @XMM[11], @XMM[2] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[10] + pxor @XMM[9], @XMM[6] + movdqu 0x30($inp), @XMM[15] # IV + pxor @XMM[10], @XMM[4] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[9] + pxor @XMM[8], @XMM[1] + movdqu 0x20($inp), @XMM[15] # IV + pxor @XMM[9], @XMM[6] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[8] # re-load input + movdqu 0x10($inp), @XMM[15] # IV + pxor @XMM[8], @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + lea ($inp), $arg1 + lea 0x20(%rbp), $arg2 # buffer output + lea ($key), $arg3 + call asm_AES_decrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[15] # ^= IV + movdqu @XMM[15], ($out) # write output + movdqa @XMM[0], @XMM[15] # IV + +.Lcbc_dec_done: + movdqu @XMM[15], (%rbx) # return IV + lea (%rsp), %rax + pxor %xmm0, %xmm0 +.Lcbc_dec_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + ja .Lcbc_dec_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lcbc_dec_epilogue: + ret +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent +.align 16 +bsaes_ctr32_encrypt_blocks: + mov %rsp, %rax +.Lctr_enc_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp), %rsp +___ +$code.=<<___ if ($win64); + mov 0xa0(%rsp),$arg5 # pull ivp + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lctr_enc_body: +___ +$code.=<<___; + mov %rsp, %rbp # backup %rsp + movdqu ($arg5), %xmm0 # load counter + mov 240($arg4), %eax # rounds + mov $arg1, $inp # backup arguments + mov $arg2, $out + mov $arg3, $len + mov $arg4, $key + movdqa %xmm0, 0x20(%rbp) # copy counter + cmp \$8, $arg3 + jb .Lctr_enc_short + + mov %eax, %ebx # rounds + shl \$7, %rax # 128 bytes per inner round key + sub \$`128-32`, %rax # size of bit-sliced key schedule + sub %rax, %rsp + + mov %rsp, %rax # pass key schedule + mov $key, %rcx # pass key + mov %ebx, %r10d # pass rounds + call _bsaes_key_convert + pxor %xmm6,%xmm7 # fix up last round key + movdqa %xmm7,(%rax) # save last round key + + movdqa (%rsp), @XMM[9] # load round0 key + lea .LADD1(%rip), %r11 + movdqa 0x20(%rbp), @XMM[0] # counter copy + movdqa -0x20(%r11), @XMM[8] # .LSWPUP + pshufb @XMM[8], @XMM[9] # byte swap upper part + pshufb @XMM[8], @XMM[0] + movdqa @XMM[9], (%rsp) # save adjusted round0 key + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa @XMM[0], 0x20(%rbp) # save counter + movdqa @XMM[0], @XMM[1] # prepare 8 counter values + movdqa @XMM[0], @XMM[2] + paddd 0x00(%r11), @XMM[1] # .LADD1 + movdqa @XMM[0], @XMM[3] + paddd 0x10(%r11), @XMM[2] # .LADD2 + movdqa @XMM[0], @XMM[4] + paddd 0x20(%r11), @XMM[3] # .LADD3 + movdqa @XMM[0], @XMM[5] + paddd 0x30(%r11), @XMM[4] # .LADD4 + movdqa @XMM[0], @XMM[6] + paddd 0x40(%r11), @XMM[5] # .LADD5 + movdqa @XMM[0], @XMM[7] + paddd 0x50(%r11), @XMM[6] # .LADD6 + paddd 0x60(%r11), @XMM[7] # .LADD7 + + # Borrow prologue from _bsaes_encrypt8 to use the opportunity + # to flip byte order in 32-bit counter + movdqa (%rsp), @XMM[9] # round 0 key + lea 0x10(%rsp), %rax # pass key schedule + movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR + pxor @XMM[9], @XMM[0] # xor with round0 key + pxor @XMM[9], @XMM[1] + pshufb @XMM[8], @XMM[0] + pxor @XMM[9], @XMM[2] + pshufb @XMM[8], @XMM[1] + pxor @XMM[9], @XMM[3] + pshufb @XMM[8], @XMM[2] + pxor @XMM[9], @XMM[4] + pshufb @XMM[8], @XMM[3] + pxor @XMM[9], @XMM[5] + pshufb @XMM[8], @XMM[4] + pxor @XMM[9], @XMM[6] + pshufb @XMM[8], @XMM[5] + pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[6] + lea .LBS0(%rip), %r11 # constants table + pshufb @XMM[8], @XMM[7] + mov %ebx,%r10d # pass rounds + + call _bsaes_encrypt8_bitslice + + sub \$8,$len + jc .Lctr_enc_loop_done + + movdqu 0x00($inp), @XMM[8] # load input + movdqu 0x10($inp), @XMM[9] + movdqu 0x20($inp), @XMM[10] + movdqu 0x30($inp), @XMM[11] + movdqu 0x40($inp), @XMM[12] + movdqu 0x50($inp), @XMM[13] + movdqu 0x60($inp), @XMM[14] + movdqu 0x70($inp), @XMM[15] + lea 0x80($inp),$inp + pxor @XMM[0], @XMM[8] + movdqa 0x20(%rbp), @XMM[0] # load counter + pxor @XMM[9], @XMM[1] + movdqu @XMM[8], 0x00($out) # write output + pxor @XMM[10], @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor @XMM[11], @XMM[6] + movdqu @XMM[4], 0x20($out) + pxor @XMM[12], @XMM[3] + movdqu @XMM[6], 0x30($out) + pxor @XMM[13], @XMM[7] + movdqu @XMM[3], 0x40($out) + pxor @XMM[14], @XMM[2] + movdqu @XMM[7], 0x50($out) + pxor @XMM[15], @XMM[5] + movdqu @XMM[2], 0x60($out) + lea .LADD1(%rip), %r11 + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + paddd 0x70(%r11), @XMM[0] # .LADD8 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + add \$8, $len + movdqu 0x00($inp), @XMM[8] # load input + pxor @XMM[8], @XMM[0] + movdqu @XMM[0], 0x00($out) # write output + cmp \$2,$len + jb .Lctr_enc_done + movdqu 0x10($inp), @XMM[9] + pxor @XMM[9], @XMM[1] + movdqu @XMM[1], 0x10($out) + je .Lctr_enc_done + movdqu 0x20($inp), @XMM[10] + pxor @XMM[10], @XMM[4] + movdqu @XMM[4], 0x20($out) + cmp \$4,$len + jb .Lctr_enc_done + movdqu 0x30($inp), @XMM[11] + pxor @XMM[11], @XMM[6] + movdqu @XMM[6], 0x30($out) + je .Lctr_enc_done + movdqu 0x40($inp), @XMM[12] + pxor @XMM[12], @XMM[3] + movdqu @XMM[3], 0x40($out) + cmp \$6,$len + jb .Lctr_enc_done + movdqu 0x50($inp), @XMM[13] + pxor @XMM[13], @XMM[7] + movdqu @XMM[7], 0x50($out) + je .Lctr_enc_done + movdqu 0x60($inp), @XMM[14] + pxor @XMM[14], @XMM[2] + movdqu @XMM[2], 0x60($out) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + lea 0x20(%rbp), $arg1 + lea 0x30(%rbp), $arg2 + lea ($key), $arg3 + call asm_AES_encrypt + movdqu ($inp), @XMM[1] + lea 16($inp), $inp + mov 0x2c(%rbp), %eax # load 32-bit counter + bswap %eax + pxor 0x30(%rbp), @XMM[1] + inc %eax # increment + movdqu @XMM[1], ($out) + bswap %eax + lea 16($out), $out + mov %eax, 0x2c(%rsp) # save 32-bit counter + dec $len + jnz .Lctr_enc_short + +.Lctr_enc_done: + lea (%rsp), %rax + pxor %xmm0, %xmm0 +.Lctr_enc_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + ja .Lctr_enc_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lctr_enc_epilogue: + ret +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($twmask,$twres,$twtmp)=@XMM[13..15]; +$arg6=~s/d$//; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,\@abi-omnipotent +.align 16 +bsaes_xts_encrypt: + mov %rsp, %rax +.Lxts_enc_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp), %rsp +___ +$code.=<<___ if ($win64); + mov 0xa0(%rsp),$arg5 # pull key2 + mov 0xa8(%rsp),$arg6 # pull ivp + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lxts_enc_body: +___ +$code.=<<___; + mov %rsp, %rbp # backup %rsp + mov $arg1, $inp # backup arguments + mov $arg2, $out + mov $arg3, $len + mov $arg4, $key + + lea ($arg6), $arg1 + lea 0x20(%rbp), $arg2 + lea ($arg5), $arg3 + call asm_AES_encrypt # generate initial tweak + + mov 240($key), %eax # rounds + mov $len, %rbx # backup $len + + mov %eax, %edx # rounds + shl \$7, %rax # 128 bytes per inner round key + sub \$`128-32`, %rax # size of bit-sliced key schedule + sub %rax, %rsp + + mov %rsp, %rax # pass key schedule + mov $key, %rcx # pass key + mov %edx, %r10d # pass rounds + call _bsaes_key_convert + pxor %xmm6, %xmm7 # fix up last round key + movdqa %xmm7, (%rax) # save last round key + + and \$-16, $len + sub \$0x80, %rsp # place for tweak[8] + movdqa 0x20(%rbp), @XMM[7] # initial tweak + + pxor $twtmp, $twtmp + movdqa .Lxts_magic(%rip), $twmask + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + + sub \$0x80, $len + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: +___ + for ($i=0;$i<7;$i++) { + $code.=<<___; + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + movdqa @XMM[7], @XMM[$i] + movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] +___ + $code.=<<___ if ($i>=1); + movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] +___ + $code.=<<___ if ($i>=2); + pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] +___ + } +$code.=<<___; + movdqu 0x60($inp), @XMM[8+6] + pxor @XMM[8+5], @XMM[5] + movdqu 0x70($inp), @XMM[8+7] + lea 0x80($inp), $inp + movdqa @XMM[7], 0x70(%rsp) + pxor @XMM[8+6], @XMM[6] + lea 0x80(%rsp), %rax # pass key schedule + pxor @XMM[8+7], @XMM[7] + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[6] + movdqu @XMM[4], 0x20($out) + pxor 0x40(%rsp), @XMM[3] + movdqu @XMM[6], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[3], 0x40($out) + pxor 0x60(%rsp), @XMM[2] + movdqu @XMM[7], 0x50($out) + pxor 0x70(%rsp), @XMM[5] + movdqu @XMM[2], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + + movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak + pxor $twtmp, $twtmp + movdqa .Lxts_magic(%rip), $twmask + pcmpgtd @XMM[7], $twtmp + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] + + sub \$0x80,$len + jnc .Lxts_enc_loop + +.Lxts_enc_short: + add \$0x80, $len + jz .Lxts_enc_done +___ + for ($i=0;$i<7;$i++) { + $code.=<<___; + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + movdqa @XMM[7], @XMM[$i] + movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] +___ + $code.=<<___ if ($i>=1); + movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] + cmp \$`0x10*$i`,$len + je .Lxts_enc_$i +___ + $code.=<<___ if ($i>=2); + pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] +___ + } +$code.=<<___; + movdqu 0x60($inp), @XMM[8+6] + pxor @XMM[8+5], @XMM[5] + movdqa @XMM[7], 0x70(%rsp) + lea 0x70($inp), $inp + pxor @XMM[8+6], @XMM[6] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[6] + movdqu @XMM[4], 0x20($out) + pxor 0x40(%rsp), @XMM[3] + movdqu @XMM[6], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[3], 0x40($out) + pxor 0x60(%rsp), @XMM[2] + movdqu @XMM[7], 0x50($out) + movdqu @XMM[2], 0x60($out) + lea 0x70($out), $out + + movdqa 0x70(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor @XMM[8+4], @XMM[4] + lea 0x60($inp), $inp + pxor @XMM[8+5], @XMM[5] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[6] + movdqu @XMM[4], 0x20($out) + pxor 0x40(%rsp), @XMM[3] + movdqu @XMM[6], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[3], 0x40($out) + movdqu @XMM[7], 0x50($out) + lea 0x60($out), $out + + movdqa 0x60(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor @XMM[8+3], @XMM[3] + lea 0x50($inp), $inp + pxor @XMM[8+4], @XMM[4] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[6] + movdqu @XMM[4], 0x20($out) + pxor 0x40(%rsp), @XMM[3] + movdqu @XMM[6], 0x30($out) + movdqu @XMM[3], 0x40($out) + lea 0x50($out), $out + + movdqa 0x50(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor @XMM[8+2], @XMM[2] + lea 0x40($inp), $inp + pxor @XMM[8+3], @XMM[3] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[6] + movdqu @XMM[4], 0x20($out) + movdqu @XMM[6], 0x30($out) + lea 0x40($out), $out + + movdqa 0x40(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor @XMM[8+1], @XMM[1] + lea 0x30($inp), $inp + pxor @XMM[8+2], @XMM[2] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[4] + movdqu @XMM[1], 0x10($out) + movdqu @XMM[4], 0x20($out) + lea 0x30($out), $out + + movdqa 0x30(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor @XMM[8+0], @XMM[0] + lea 0x20($inp), $inp + pxor @XMM[8+1], @XMM[1] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_encrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + lea 0x20($out), $out + + movdqa 0x20(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor @XMM[0], @XMM[8] + lea 0x10($inp), $inp + movdqa @XMM[8], 0x20(%rbp) + lea 0x20(%rbp), $arg1 + lea 0x20(%rbp), $arg2 + lea ($key), $arg3 + call asm_AES_encrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[0] # ^= tweak[] + #pxor @XMM[8], @XMM[0] + #lea 0x80(%rsp), %rax # pass key schedule + #mov %edx, %r10d # pass rounds + #call _bsaes_encrypt8 + #pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + movdqu @XMM[0], 0x00($out) # write output + lea 0x10($out), $out + + movdqa 0x10(%rsp), @XMM[7] # next iteration tweak + +.Lxts_enc_done: + and \$15, %ebx + jz .Lxts_enc_ret + mov $out, %rdx + +.Lxts_enc_steal: + movzb ($inp), %eax + movzb -16(%rdx), %ecx + lea 1($inp), $inp + mov %al, -16(%rdx) + mov %cl, 0(%rdx) + lea 1(%rdx), %rdx + sub \$1,%ebx + jnz .Lxts_enc_steal + + movdqu -16($out), @XMM[0] + lea 0x20(%rbp), $arg1 + pxor @XMM[7], @XMM[0] + lea 0x20(%rbp), $arg2 + movdqa @XMM[0], 0x20(%rbp) + lea ($key), $arg3 + call asm_AES_encrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[7] + movdqu @XMM[7], -16($out) + +.Lxts_enc_ret: + lea (%rsp), %rax + pxor %xmm0, %xmm0 +.Lxts_enc_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + ja .Lxts_enc_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lxts_enc_epilogue: + ret +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,\@abi-omnipotent +.align 16 +bsaes_xts_decrypt: + mov %rsp, %rax +.Lxts_dec_prologue: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + lea -0x48(%rsp), %rsp +___ +$code.=<<___ if ($win64); + mov 0xa0(%rsp),$arg5 # pull key2 + mov 0xa8(%rsp),$arg6 # pull ivp + lea -0xa0(%rsp), %rsp + movaps %xmm6, 0x40(%rsp) + movaps %xmm7, 0x50(%rsp) + movaps %xmm8, 0x60(%rsp) + movaps %xmm9, 0x70(%rsp) + movaps %xmm10, 0x80(%rsp) + movaps %xmm11, 0x90(%rsp) + movaps %xmm12, 0xa0(%rsp) + movaps %xmm13, 0xb0(%rsp) + movaps %xmm14, 0xc0(%rsp) + movaps %xmm15, 0xd0(%rsp) +.Lxts_dec_body: +___ +$code.=<<___; + mov %rsp, %rbp # backup %rsp + mov $arg1, $inp # backup arguments + mov $arg2, $out + mov $arg3, $len + mov $arg4, $key + + lea ($arg6), $arg1 + lea 0x20(%rbp), $arg2 + lea ($arg5), $arg3 + call asm_AES_encrypt # generate initial tweak + + mov 240($key), %eax # rounds + mov $len, %rbx # backup $len + + mov %eax, %edx # rounds + shl \$7, %rax # 128 bytes per inner round key + sub \$`128-32`, %rax # size of bit-sliced key schedule + sub %rax, %rsp + + mov %rsp, %rax # pass key schedule + mov $key, %rcx # pass key + mov %edx, %r10d # pass rounds + call _bsaes_key_convert + pxor (%rsp), %xmm7 # fix up round 0 key + movdqa %xmm6, (%rax) # save last round key + movdqa %xmm7, (%rsp) + + xor %eax, %eax # if ($len%16) len-=16; + and \$-16, $len + test \$15, %ebx + setnz %al + shl \$4, %rax + sub %rax, $len + + sub \$0x80, %rsp # place for tweak[8] + movdqa 0x20(%rbp), @XMM[7] # initial tweak + + pxor $twtmp, $twtmp + movdqa .Lxts_magic(%rip), $twmask + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + + sub \$0x80, $len + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: +___ + for ($i=0;$i<7;$i++) { + $code.=<<___; + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + movdqa @XMM[7], @XMM[$i] + movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] +___ + $code.=<<___ if ($i>=1); + movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] +___ + $code.=<<___ if ($i>=2); + pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] +___ + } +$code.=<<___; + movdqu 0x60($inp), @XMM[8+6] + pxor @XMM[8+5], @XMM[5] + movdqu 0x70($inp), @XMM[8+7] + lea 0x80($inp), $inp + movdqa @XMM[7], 0x70(%rsp) + pxor @XMM[8+6], @XMM[6] + lea 0x80(%rsp), %rax # pass key schedule + pxor @XMM[8+7], @XMM[7] + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[4] + movdqu @XMM[6], 0x20($out) + pxor 0x40(%rsp), @XMM[2] + movdqu @XMM[4], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[2], 0x40($out) + pxor 0x60(%rsp), @XMM[3] + movdqu @XMM[7], 0x50($out) + pxor 0x70(%rsp), @XMM[5] + movdqu @XMM[3], 0x60($out) + movdqu @XMM[5], 0x70($out) + lea 0x80($out), $out + + movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak + pxor $twtmp, $twtmp + movdqa .Lxts_magic(%rip), $twmask + pcmpgtd @XMM[7], $twtmp + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] + + sub \$0x80,$len + jnc .Lxts_dec_loop + +.Lxts_dec_short: + add \$0x80, $len + jz .Lxts_dec_done +___ + for ($i=0;$i<7;$i++) { + $code.=<<___; + pshufd \$0x13, $twtmp, $twres + pxor $twtmp, $twtmp + movdqa @XMM[7], @XMM[$i] + movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + pcmpgtd @XMM[7], $twtmp # broadcast upper bits + pxor $twres, @XMM[7] +___ + $code.=<<___ if ($i>=1); + movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] + cmp \$`0x10*$i`,$len + je .Lxts_dec_$i +___ + $code.=<<___ if ($i>=2); + pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] +___ + } +$code.=<<___; + movdqu 0x60($inp), @XMM[8+6] + pxor @XMM[8+5], @XMM[5] + movdqa @XMM[7], 0x70(%rsp) + lea 0x70($inp), $inp + pxor @XMM[8+6], @XMM[6] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[4] + movdqu @XMM[6], 0x20($out) + pxor 0x40(%rsp), @XMM[2] + movdqu @XMM[4], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[2], 0x40($out) + pxor 0x60(%rsp), @XMM[3] + movdqu @XMM[7], 0x50($out) + movdqu @XMM[3], 0x60($out) + lea 0x70($out), $out + + movdqa 0x70(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor @XMM[8+4], @XMM[4] + lea 0x60($inp), $inp + pxor @XMM[8+5], @XMM[5] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[4] + movdqu @XMM[6], 0x20($out) + pxor 0x40(%rsp), @XMM[2] + movdqu @XMM[4], 0x30($out) + pxor 0x50(%rsp), @XMM[7] + movdqu @XMM[2], 0x40($out) + movdqu @XMM[7], 0x50($out) + lea 0x60($out), $out + + movdqa 0x60(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor @XMM[8+3], @XMM[3] + lea 0x50($inp), $inp + pxor @XMM[8+4], @XMM[4] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[4] + movdqu @XMM[6], 0x20($out) + pxor 0x40(%rsp), @XMM[2] + movdqu @XMM[4], 0x30($out) + movdqu @XMM[2], 0x40($out) + lea 0x50($out), $out + + movdqa 0x50(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor @XMM[8+2], @XMM[2] + lea 0x40($inp), $inp + pxor @XMM[8+3], @XMM[3] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + pxor 0x30(%rsp), @XMM[4] + movdqu @XMM[6], 0x20($out) + movdqu @XMM[4], 0x30($out) + lea 0x40($out), $out + + movdqa 0x40(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor @XMM[8+1], @XMM[1] + lea 0x30($inp), $inp + pxor @XMM[8+2], @XMM[2] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + pxor 0x20(%rsp), @XMM[6] + movdqu @XMM[1], 0x10($out) + movdqu @XMM[6], 0x20($out) + lea 0x30($out), $out + + movdqa 0x30(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor @XMM[8+0], @XMM[0] + lea 0x20($inp), $inp + pxor @XMM[8+1], @XMM[1] + lea 0x80(%rsp), %rax # pass key schedule + mov %edx, %r10d # pass rounds + + call _bsaes_decrypt8 + + pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + pxor 0x10(%rsp), @XMM[1] + movdqu @XMM[0], 0x00($out) # write output + movdqu @XMM[1], 0x10($out) + lea 0x20($out), $out + + movdqa 0x20(%rsp), @XMM[7] # next iteration tweak + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor @XMM[0], @XMM[8] + lea 0x10($inp), $inp + movdqa @XMM[8], 0x20(%rbp) + lea 0x20(%rbp), $arg1 + lea 0x20(%rbp), $arg2 + lea ($key), $arg3 + call asm_AES_decrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[0] # ^= tweak[] + #pxor @XMM[8], @XMM[0] + #lea 0x80(%rsp), %rax # pass key schedule + #mov %edx, %r10d # pass rounds + #call _bsaes_decrypt8 + #pxor 0x00(%rsp), @XMM[0] # ^= tweak[] + movdqu @XMM[0], 0x00($out) # write output + lea 0x10($out), $out + + movdqa 0x10(%rsp), @XMM[7] # next iteration tweak + +.Lxts_dec_done: + and \$15, %ebx + jz .Lxts_dec_ret + + pxor $twtmp, $twtmp + movdqa .Lxts_magic(%rip), $twmask + pcmpgtd @XMM[7], $twtmp + pshufd \$0x13, $twtmp, $twres + movdqa @XMM[7], @XMM[6] + paddq @XMM[7], @XMM[7] # psllq 1,$tweak + pand $twmask, $twres # isolate carry and residue + movdqu ($inp), @XMM[0] + pxor $twres, @XMM[7] + + lea 0x20(%rbp), $arg1 + pxor @XMM[7], @XMM[0] + lea 0x20(%rbp), $arg2 + movdqa @XMM[0], 0x20(%rbp) + lea ($key), $arg3 + call asm_AES_decrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[7] + mov $out, %rdx + movdqu @XMM[7], ($out) + +.Lxts_dec_steal: + movzb 16($inp), %eax + movzb (%rdx), %ecx + lea 1($inp), $inp + mov %al, (%rdx) + mov %cl, 16(%rdx) + lea 1(%rdx), %rdx + sub \$1,%ebx + jnz .Lxts_dec_steal + + movdqu ($out), @XMM[0] + lea 0x20(%rbp), $arg1 + pxor @XMM[6], @XMM[0] + lea 0x20(%rbp), $arg2 + movdqa @XMM[0], 0x20(%rbp) + lea ($key), $arg3 + call asm_AES_decrypt # doesn't touch %xmm + pxor 0x20(%rbp), @XMM[6] + movdqu @XMM[6], ($out) + +.Lxts_dec_ret: + lea (%rsp), %rax + pxor %xmm0, %xmm0 +.Lxts_dec_bzero: # wipe key schedule [if any] + movdqa %xmm0, 0x00(%rax) + movdqa %xmm0, 0x10(%rax) + lea 0x20(%rax), %rax + cmp %rax, %rbp + ja .Lxts_dec_bzero + + lea (%rbp),%rsp # restore %rsp +___ +$code.=<<___ if ($win64); + movaps 0x40(%rbp), %xmm6 + movaps 0x50(%rbp), %xmm7 + movaps 0x60(%rbp), %xmm8 + movaps 0x70(%rbp), %xmm9 + movaps 0x80(%rbp), %xmm10 + movaps 0x90(%rbp), %xmm11 + movaps 0xa0(%rbp), %xmm12 + movaps 0xb0(%rbp), %xmm13 + movaps 0xc0(%rbp), %xmm14 + movaps 0xd0(%rbp), %xmm15 + lea 0xa0(%rbp), %rsp +___ +$code.=<<___; + mov 0x48(%rsp), %r15 + mov 0x50(%rsp), %r14 + mov 0x58(%rsp), %r13 + mov 0x60(%rsp), %r12 + mov 0x68(%rsp), %rbx + mov 0x70(%rsp), %rax + lea 0x78(%rsp), %rsp + mov %rax, %rbp +.Lxts_dec_epilogue: + ret +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +.type _bsaes_const,\@object +.align 64 +_bsaes_const: +.LM0ISR: # InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: # bit-slice constants + .quad 0x5555555555555555, 0x5555555555555555 +.LBS1: + .quad 0x3333333333333333, 0x3333333333333333 +.LBS2: + .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: # shiftrows constants + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: # byte-swap upper dword + .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: + .quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: # counter increment constants + .quad 0x0000000000000000, 0x0000000100000000 +.LADD2: + .quad 0x0000000000000000, 0x0000000200000000 +.LADD3: + .quad 0x0000000000000000, 0x0000000300000000 +.LADD4: + .quad 0x0000000000000000, 0x0000000400000000 +.LADD5: + .quad 0x0000000000000000, 0x0000000500000000 +.LADD6: + .quad 0x0000000000000000, 0x0000000600000000 +.LADD7: + .quad 0x0000000000000000, 0x0000000700000000 +.LADD8: + .quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: + .long 0x87,0,1,0 +.Lmasks: + .quad 0x0101010101010101, 0x0101010101010101 + .quad 0x0202020202020202, 0x0202020202020202 + .quad 0x0404040404040404, 0x0404040404040404 + .quad 0x0808080808080808, 0x0808080808080808 +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: + .quad 0x6363636363636363, 0x6363636363636363 +.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov" +.align 64 +.size _bsaes_const,.-_bsaes_const +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + mov 160($context),%rax # pull context->Rbp + + lea 0x40(%rax),%rsi # %xmm save area + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0xa0(%rax),%rax # adjust stack pointer + + mov 0x70(%rax),%rbp + mov 0x68(%rax),%rbx + mov 0x60(%rax),%r12 + mov 0x58(%rax),%r13 + mov 0x50(%rax),%r14 + mov 0x48(%rax),%r15 + lea 0x78(%rax),%rax # adjust stack pointer + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lin_prologue: + mov %rax,152($context) # restore context->Rsp + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$`1232/8`,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 +___ +$code.=<<___ if ($ecb); + .rva .Lecb_enc_prologue + .rva .Lecb_enc_epilogue + .rva .Lecb_enc_info + + .rva .Lecb_dec_prologue + .rva .Lecb_dec_epilogue + .rva .Lecb_dec_info +___ +$code.=<<___; + .rva .Lcbc_dec_prologue + .rva .Lcbc_dec_epilogue + .rva .Lcbc_dec_info + + .rva .Lctr_enc_prologue + .rva .Lctr_enc_epilogue + .rva .Lctr_enc_info + + .rva .Lxts_enc_prologue + .rva .Lxts_enc_epilogue + .rva .Lxts_enc_info + + .rva .Lxts_dec_prologue + .rva .Lxts_dec_epilogue + .rva .Lxts_dec_info + +.section .xdata +.align 8 +___ +$code.=<<___ if ($ecb); +.Lecb_enc_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[] +.Lecb_dec_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[] +___ +$code.=<<___; +.Lcbc_dec_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[] +.Lctr_enc_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[] +.Lxts_enc_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] +.Lxts_dec_info: + .byte 9,0,0,0 + .rva se_handler + .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] +___ +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/aes/asm/vpaes-x86.S b/app/openssl/crypto/aes/asm/vpaes-x86.S new file mode 100644 index 00000000..c53a5074 --- /dev/null +++ b/app/openssl/crypto/aes/asm/vpaes-x86.S @@ -0,0 +1,661 @@ +.file "vpaes-x86.s" +.text +.align 64 +.L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 64 +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.size _vpaes_preheat,.-_vpaes_preheat +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%edx),%xmm5 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addl $16,%edx + leal 192(%ebp),%ebx + jmp .L000enc_entry +.align 16 +.L001enc_loop: + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa 64(%ebp),%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%ebx,%ecx,1),%xmm1 + movdqa 80(%ebp),%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm3,%xmm0 + subl $1,%eax +.L000enc_entry: + movdqa %xmm6,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm7,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz .L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movl 240(%edx),%eax + leal 608(%ebp),%ebx + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp .L002dec_entry +.align 16 +.L003dec_loop: + movdqa -32(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,56,0,197 + movdqa (%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subl $1,%eax +.byte 102,15,56,0,197 + movdqa 32(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqa 64(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,58,15,237,12 +.L002dec_entry: + movdqa %xmm6,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm7,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%edx),%xmm0 + jnz .L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz .L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp .L005schedule_go +.L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +.L005schedule_go: + cmpl $192,%eax + ja .L006schedule_256 + je .L007schedule_192 +.L008schedule_128: + movl $10,%eax +.L009loop_schedule_128: + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + jmp .L009loop_schedule_128 +.align 16 +.L007schedule_192: + movdqu 8(%esi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +.L011loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .L011loop_schedule_192 +.align 16 +.L006schedule_256: + movdqu 16(%esi),%xmm0 + call _vpaes_schedule_transform + movl $7,%eax +.L012loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call .L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp .L012loop_schedule_256 +.align 16 +.L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz .L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +.L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +.L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz .L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp .L015schedule_mangle_both +.align 16 +.L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +.L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal .L_vpaes_consts+0x30-.L016pic_point,%ebp + call _vpaes_schedule_core +.L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal .L_vpaes_consts+0x30-.L017pic_point,%ebp + call _vpaes_schedule_core +.L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L018pic_point,%ebp + call _vpaes_preheat +.L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_encrypt,.-.L_vpaes_encrypt_begin +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L019pic_point,%ebp + call _vpaes_preheat +.L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_decrypt,.-.L_vpaes_decrypt_begin +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc .L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + call _vpaes_preheat +.L021pic_point: + cmpl $0,%ecx + je .L022cbc_dec_loop + jmp .L023cbc_enc_loop +.align 16 +.L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call _vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L023cbc_enc_loop + jmp .L024cbc_done +.align 16 +.L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call _vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L022cbc_dec_loop +.L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +.L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin diff --git a/app/openssl/crypto/aes/asm/vpaes-x86.pl b/app/openssl/crypto/aes/asm/vpaes-x86.pl new file mode 100644 index 00000000..1533e2c3 --- /dev/null +++ b/app/openssl/crypto/aes/asm/vpaes-x86.pl @@ -0,0 +1,903 @@ +#!/usr/bin/env perl + +###################################################################### +## Constant-time SSSE3 AES core implementation. +## version 0.1 +## +## By Mike Hamburg (Stanford University), 2009 +## Public domain. +## +## For details see http://shiftleft.org/papers/vector_aes/ and +## http://crypto.stanford.edu/vpaes/. + +###################################################################### +# September 2011. +# +# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for +# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt +# doesn't handle partial vectors (doesn't have to if called from +# EVP only). "Drop-in" implies that this module doesn't share key +# schedule structure with the original nor does it make assumption +# about its alignment... +# +# Performance summary. aes-586.pl column lists large-block CBC +# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per +# byte processed with 128-bit key, and vpaes-x86.pl column - [also +# large-block CBC] encrypt/decrypt. +# +# aes-586.pl vpaes-x86.pl +# +# Core 2(**) 29.1/42.3/18.3 22.0/25.6(***) +# Nehalem 27.9/40.4/18.1 10.3/12.0 +# Atom 102./119./60.1 64.5/85.3(***) +# +# (*) "Hyper-threading" in the context refers rather to cache shared +# among multiple cores, than to specifically Intel HTT. As vast +# majority of contemporary cores share cache, slower code path +# is common place. In other words "with-hyper-threading-off" +# results are presented mostly for reference purposes. +# +# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. +# +# (***) Less impressive improvement on Core 2 and Atom is due to slow +# pshufb, yet it's respectable +32%/65% improvement on Core 2 +# and +58%/40% on Atom (as implied, over "hyper-threading-safe" +# code path). +# +# + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); + +$PREFIX="vpaes"; + +my ($round, $base, $magic, $key, $const, $inp, $out)= + ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); + +&static_label("_vpaes_consts"); +&static_label("_vpaes_schedule_low_round"); + +&set_label("_vpaes_consts",64); +$k_inv=-0x30; # inv, inva + &data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309); + &data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C); + +$k_s0F=-0x10; # s0F + &data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F); + +$k_ipt=0x00; # input transform (lo, hi) + &data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090); + &data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC); + +$k_sb1=0x20; # sb1u, sb1t + &data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E); + &data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1); +$k_sb2=0x40; # sb2u, sb2t + &data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955); + &data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8); +$k_sbo=0x60; # sbou, sbot + &data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A); + &data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1); + +$k_mc_forward=0x80; # mc_forward + &data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D); + &data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201); + &data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605); + &data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09); + +$k_mc_backward=0xc0; # mc_backward + &data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F); + &data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B); + &data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407); + &data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003); + +$k_sr=0x100; # sr + &data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C); + &data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C); + &data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C); + &data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C); + +$k_rcon=0x140; # rcon + &data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808); + +$k_s63=0x150; # s63: all equal to 0x63 transformed + &data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B); + +$k_opt=0x160; # output transform + &data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121); + &data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1); + +$k_deskew=0x180; # deskew tables: inverts the sbox's "skew" + &data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A); + &data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB); +## +## Decryption stuff +## Key schedule constants +## +$k_dksd=0x1a0; # decryption key schedule: invskew x*D + &data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4); + &data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA); +$k_dksb=0x1c0; # decryption key schedule: invskew x*B + &data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386); + &data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F); +$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63 + &data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C); + &data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A); +$k_dks9=0x200; # decryption key schedule: invskew x*9 + &data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334); + &data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC); + +## +## Decryption stuff +## Round function constants +## +$k_dipt=0x220; # decryption input transform + &data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E); + &data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772); + +$k_dsb9=0x240; # decryption sbox output *9*u, *9*t + &data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50); + &data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E); +$k_dsbd=0x260; # decryption sbox output *D*u, *D*t + &data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13); + &data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D); +$k_dsbb=0x280; # decryption sbox output *B*u, *B*t + &data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6); + &data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E); +$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t + &data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004); + &data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B); +$k_dsbo=0x2c0; # decryption sbox final output + &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); + &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); +&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)"); +&align (64); + +&function_begin_B("_vpaes_preheat"); + &add ($const,&DWP(0,"esp")); + &movdqa ("xmm7",&QWP($k_inv,$const)); + &movdqa ("xmm6",&QWP($k_s0F,$const)); + &ret (); +&function_end_B("_vpaes_preheat"); + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm6-%xmm7 as in _vpaes_preheat +## (%edx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx +## +## +&function_begin_B("_vpaes_encrypt_core"); + &mov ($magic,16); + &mov ($round,&DWP(240,$key)); + &movdqa ("xmm1","xmm6") + &movdqa ("xmm2",&QWP($k_ipt,$const)); + &pandn ("xmm1","xmm0"); + &movdqu ("xmm5",&QWP(0,$key)); + &psrld ("xmm1",4); + &pand ("xmm0","xmm6"); + &pshufb ("xmm2","xmm0"); + &movdqa ("xmm0",&QWP($k_ipt+16,$const)); + &pshufb ("xmm0","xmm1"); + &pxor ("xmm2","xmm5"); + &pxor ("xmm0","xmm2"); + &add ($key,16); + &lea ($base,&DWP($k_mc_backward,$const)); + &jmp (&label("enc_entry")); + + +&set_label("enc_loop",16); + # middle of middle round + &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u + &pshufb ("xmm4","xmm2"); # 4 = sb1u + &pxor ("xmm4","xmm5"); # 4 = sb1u + k + &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t + &pshufb ("xmm0","xmm3"); # 0 = sb1t + &pxor ("xmm0","xmm4"); # 0 = A + &movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u + &pshufb ("xmm5","xmm2"); # 4 = sb2u + &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[] + &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t + &pshufb ("xmm2","xmm3"); # 2 = sb2t + &pxor ("xmm2","xmm5"); # 2 = 2A + &movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[] + &movdqa ("xmm3","xmm0"); # 3 = A + &pshufb ("xmm0","xmm1"); # 0 = B + &add ($key,16); # next key + &pxor ("xmm0","xmm2"); # 0 = 2A+B + &pshufb ("xmm3","xmm4"); # 3 = D + &add ($magic,16); # next mc + &pxor ("xmm3","xmm0"); # 3 = 2A+B+D + &pshufb ("xmm0","xmm1"); # 0 = 2B+C + &and ($magic,0x30); # ... mod 4 + &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D + &sub ($round,1); # nr-- + +&set_label("enc_entry"); + # top of round + &movdqa ("xmm1","xmm6"); # 1 : i + &pandn ("xmm1","xmm0"); # 1 = i<<4 + &psrld ("xmm1",4); # 1 = i + &pand ("xmm0","xmm6"); # 0 = k + &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k + &pshufb ("xmm5","xmm0"); # 2 = a/k + &pxor ("xmm0","xmm1"); # 0 = j + &movdqa ("xmm3","xmm7"); # 3 : 1/i + &pshufb ("xmm3","xmm1"); # 3 = 1/i + &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k + &movdqa ("xmm4","xmm7"); # 4 : 1/j + &pshufb ("xmm4","xmm0"); # 4 = 1/j + &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k + &movdqa ("xmm2","xmm7"); # 2 : 1/iak + &pshufb ("xmm2","xmm3"); # 2 = 1/iak + &pxor ("xmm2","xmm0"); # 2 = io + &movdqa ("xmm3","xmm7"); # 3 : 1/jak + &movdqu ("xmm5",&QWP(0,$key)); + &pshufb ("xmm3","xmm4"); # 3 = 1/jak + &pxor ("xmm3","xmm1"); # 3 = jo + &jnz (&label("enc_loop")); + + # middle of last round + &movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo + &movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16 + &pshufb ("xmm4","xmm2"); # 4 = sbou + &pxor ("xmm4","xmm5"); # 4 = sb1u + k + &pshufb ("xmm0","xmm3"); # 0 = sb1t + &movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[] + &pxor ("xmm0","xmm4"); # 0 = A + &pshufb ("xmm0","xmm1"); + &ret (); +&function_end_B("_vpaes_encrypt_core"); + +## +## Decryption core +## +## Same API as encryption core. +## +&function_begin_B("_vpaes_decrypt_core"); + &mov ($round,&DWP(240,$key)); + &lea ($base,&DWP($k_dsbd,$const)); + &movdqa ("xmm1","xmm6"); + &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base)); + &pandn ("xmm1","xmm0"); + &mov ($magic,$round); + &psrld ("xmm1",4) + &movdqu ("xmm5",&QWP(0,$key)); + &shl ($magic,4); + &pand ("xmm0","xmm6"); + &pshufb ("xmm2","xmm0"); + &movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base)); + &xor ($magic,0x30); + &pshufb ("xmm0","xmm1"); + &and ($magic,0x30); + &pxor ("xmm2","xmm5"); + &movdqa ("xmm5",&QWP($k_mc_forward+48,$const)); + &pxor ("xmm0","xmm2"); + &add ($key,16); + &lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic)); + &jmp (&label("dec_entry")); + +&set_label("dec_loop",16); +## +## Inverse mix columns +## + &movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u + &pshufb ("xmm4","xmm2"); # 4 = sb9u + &pxor ("xmm4","xmm0"); + &movdqa ("xmm0",&QWP(-0x10,$base)); # 0 : sb9t + &pshufb ("xmm0","xmm3"); # 0 = sb9t + &pxor ("xmm0","xmm4"); # 0 = ch + &add ($key,16); # next round key + + &pshufb ("xmm0","xmm5"); # MC ch + &movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu + &pshufb ("xmm4","xmm2"); # 4 = sbdu + &pxor ("xmm4","xmm0"); # 4 = ch + &movdqa ("xmm0",&QWP(0x10,$base)); # 0 : sbdt + &pshufb ("xmm0","xmm3"); # 0 = sbdt + &pxor ("xmm0","xmm4"); # 0 = ch + &sub ($round,1); # nr-- + + &pshufb ("xmm0","xmm5"); # MC ch + &movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu + &pshufb ("xmm4","xmm2"); # 4 = sbbu + &pxor ("xmm4","xmm0"); # 4 = ch + &movdqa ("xmm0",&QWP(0x30,$base)); # 0 : sbbt + &pshufb ("xmm0","xmm3"); # 0 = sbbt + &pxor ("xmm0","xmm4"); # 0 = ch + + &pshufb ("xmm0","xmm5"); # MC ch + &movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu + &pshufb ("xmm4","xmm2"); # 4 = sbeu + &pxor ("xmm4","xmm0"); # 4 = ch + &movdqa ("xmm0",&QWP(0x50,$base)); # 0 : sbet + &pshufb ("xmm0","xmm3"); # 0 = sbet + &pxor ("xmm0","xmm4"); # 0 = ch + + &palignr("xmm5","xmm5",12); + +&set_label("dec_entry"); + # top of round + &movdqa ("xmm1","xmm6"); # 1 : i + &pandn ("xmm1","xmm0"); # 1 = i<<4 + &psrld ("xmm1",4); # 1 = i + &pand ("xmm0","xmm6"); # 0 = k + &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k + &pshufb ("xmm2","xmm0"); # 2 = a/k + &pxor ("xmm0","xmm1"); # 0 = j + &movdqa ("xmm3","xmm7"); # 3 : 1/i + &pshufb ("xmm3","xmm1"); # 3 = 1/i + &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k + &movdqa ("xmm4","xmm7"); # 4 : 1/j + &pshufb ("xmm4","xmm0"); # 4 = 1/j + &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k + &movdqa ("xmm2","xmm7"); # 2 : 1/iak + &pshufb ("xmm2","xmm3"); # 2 = 1/iak + &pxor ("xmm2","xmm0"); # 2 = io + &movdqa ("xmm3","xmm7"); # 3 : 1/jak + &pshufb ("xmm3","xmm4"); # 3 = 1/jak + &pxor ("xmm3","xmm1"); # 3 = jo + &movdqu ("xmm0",&QWP(0,$key)); + &jnz (&label("dec_loop")); + + # middle of last round + &movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou + &pshufb ("xmm4","xmm2"); # 4 = sbou + &pxor ("xmm4","xmm0"); # 4 = sb1u + k + &movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot + &movdqa ("xmm2",&QWP(0,$magic)); + &pshufb ("xmm0","xmm3"); # 0 = sb1t + &pxor ("xmm0","xmm4"); # 0 = A + &pshufb ("xmm0","xmm2"); + &ret (); +&function_end_B("_vpaes_decrypt_core"); + +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## +&function_begin_B("_vpaes_schedule_core"); + &add ($const,&DWP(0,"esp")); + &movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned) + &movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon + + # input transform + &movdqa ("xmm3","xmm0"); + &lea ($base,&DWP($k_ipt,$const)); + &movdqa (&QWP(4,"esp"),"xmm2"); # xmm8 + &call ("_vpaes_schedule_transform"); + &movdqa ("xmm7","xmm0"); + + &test ($out,$out); + &jnz (&label("schedule_am_decrypting")); + + # encrypting, output zeroth round key after transform + &movdqu (&QWP(0,$key),"xmm0"); + &jmp (&label("schedule_go")); + +&set_label("schedule_am_decrypting"); + # decrypting, output zeroth round key after shiftrows + &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); + &pshufb ("xmm3","xmm1"); + &movdqu (&QWP(0,$key),"xmm3"); + &xor ($magic,0x30); + +&set_label("schedule_go"); + &cmp ($round,192); + &ja (&label("schedule_256")); + &je (&label("schedule_192")); + # 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +&set_label("schedule_128"); + &mov ($round,10); + +&set_label("loop_schedule_128"); + &call ("_vpaes_schedule_round"); + &dec ($round); + &jz (&label("schedule_mangle_last")); + &call ("_vpaes_schedule_mangle"); # write output + &jmp (&label("loop_schedule_128")); + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +&set_label("schedule_192",16); + &movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned) + &call ("_vpaes_schedule_transform"); # input transform + &movdqa ("xmm6","xmm0"); # save short part + &pxor ("xmm4","xmm4"); # clear 4 + &movhlps("xmm6","xmm4"); # clobber low side with zeros + &mov ($round,4); + +&set_label("loop_schedule_192"); + &call ("_vpaes_schedule_round"); + &palignr("xmm0","xmm6",8); + &call ("_vpaes_schedule_mangle"); # save key n + &call ("_vpaes_schedule_192_smear"); + &call ("_vpaes_schedule_mangle"); # save key n+1 + &call ("_vpaes_schedule_round"); + &dec ($round); + &jz (&label("schedule_mangle_last")); + &call ("_vpaes_schedule_mangle"); # save key n+2 + &call ("_vpaes_schedule_192_smear"); + &jmp (&label("loop_schedule_192")); + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +&set_label("schedule_256",16); + &movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned) + &call ("_vpaes_schedule_transform"); # input transform + &mov ($round,7); + +&set_label("loop_schedule_256"); + &call ("_vpaes_schedule_mangle"); # output low result + &movdqa ("xmm6","xmm0"); # save cur_lo in xmm6 + + # high round + &call ("_vpaes_schedule_round"); + &dec ($round); + &jz (&label("schedule_mangle_last")); + &call ("_vpaes_schedule_mangle"); + + # low round. swap xmm7 and xmm6 + &pshufd ("xmm0","xmm0",0xFF); + &movdqa (&QWP(20,"esp"),"xmm7"); + &movdqa ("xmm7","xmm6"); + &call ("_vpaes_schedule_low_round"); + &movdqa ("xmm7",&QWP(20,"esp")); + + &jmp (&label("loop_schedule_256")); + +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +&set_label("schedule_mangle_last",16); + # schedule last round key from xmm0 + &lea ($base,&DWP($k_deskew,$const)); + &test ($out,$out); + &jnz (&label("schedule_mangle_last_dec")); + + # encrypting + &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); + &pshufb ("xmm0","xmm1"); # output permute + &lea ($base,&DWP($k_opt,$const)); # prepare to output transform + &add ($key,32); + +&set_label("schedule_mangle_last_dec"); + &add ($key,-16); + &pxor ("xmm0",&QWP($k_s63,$const)); + &call ("_vpaes_schedule_transform"); # output transform + &movdqu (&QWP(0,$key),"xmm0"); # save last key + + # cleanup + &pxor ("xmm0","xmm0"); + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &pxor ("xmm6","xmm6"); + &pxor ("xmm7","xmm7"); + &ret (); +&function_end_B("_vpaes_schedule_core"); + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +&function_begin_B("_vpaes_schedule_192_smear"); + &pshufd ("xmm0","xmm6",0x80); # d c 0 0 -> c 0 0 0 + &pxor ("xmm6","xmm0"); # -> c+d c 0 0 + &pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a + &pxor ("xmm6","xmm0"); # -> b+c+d b+c b a + &movdqa ("xmm0","xmm6"); + &pxor ("xmm1","xmm1"); + &movhlps("xmm6","xmm1"); # clobber low side with zeros + &ret (); +&function_end_B("_vpaes_schedule_192_smear"); + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm5. +## +&function_begin_B("_vpaes_schedule_round"); + # extract rcon from xmm8 + &movdqa ("xmm2",&QWP(8,"esp")); # xmm8 + &pxor ("xmm1","xmm1"); + &palignr("xmm1","xmm2",15); + &palignr("xmm2","xmm2",15); + &pxor ("xmm7","xmm1"); + + # rotate + &pshufd ("xmm0","xmm0",0xFF); + &palignr("xmm0","xmm0",1); + + # fall through... + &movdqa (&QWP(8,"esp"),"xmm2"); # xmm8 + + # low round: same as high round, but no rotation and no rcon. +&set_label("_vpaes_schedule_low_round"); + # smear xmm7 + &movdqa ("xmm1","xmm7"); + &pslldq ("xmm7",4); + &pxor ("xmm7","xmm1"); + &movdqa ("xmm1","xmm7"); + &pslldq ("xmm7",8); + &pxor ("xmm7","xmm1"); + &pxor ("xmm7",&QWP($k_s63,$const)); + + # subbyte + &movdqa ("xmm4",&QWP($k_s0F,$const)); + &movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j + &movdqa ("xmm1","xmm4"); + &pandn ("xmm1","xmm0"); + &psrld ("xmm1",4); # 1 = i + &pand ("xmm0","xmm4"); # 0 = k + &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k + &pshufb ("xmm2","xmm0"); # 2 = a/k + &pxor ("xmm0","xmm1"); # 0 = j + &movdqa ("xmm3","xmm5"); # 3 : 1/i + &pshufb ("xmm3","xmm1"); # 3 = 1/i + &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k + &movdqa ("xmm4","xmm5"); # 4 : 1/j + &pshufb ("xmm4","xmm0"); # 4 = 1/j + &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k + &movdqa ("xmm2","xmm5"); # 2 : 1/iak + &pshufb ("xmm2","xmm3"); # 2 = 1/iak + &pxor ("xmm2","xmm0"); # 2 = io + &movdqa ("xmm3","xmm5"); # 3 : 1/jak + &pshufb ("xmm3","xmm4"); # 3 = 1/jak + &pxor ("xmm3","xmm1"); # 3 = jo + &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou + &pshufb ("xmm4","xmm2"); # 4 = sbou + &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot + &pshufb ("xmm0","xmm3"); # 0 = sb1t + &pxor ("xmm0","xmm4"); # 0 = sbox output + + # add in smeared stuff + &pxor ("xmm0","xmm7"); + &movdqa ("xmm7","xmm0"); + &ret (); +&function_end_B("_vpaes_schedule_round"); + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%ebx) +## +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## +&function_begin_B("_vpaes_schedule_transform"); + &movdqa ("xmm2",&QWP($k_s0F,$const)); + &movdqa ("xmm1","xmm2"); + &pandn ("xmm1","xmm0"); + &psrld ("xmm1",4); + &pand ("xmm0","xmm2"); + &movdqa ("xmm2",&QWP(0,$base)); + &pshufb ("xmm2","xmm0"); + &movdqa ("xmm0",&QWP(16,$base)); + &pshufb ("xmm0","xmm1"); + &pxor ("xmm0","xmm2"); + &ret (); +&function_end_B("_vpaes_schedule_transform"); + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%edx), and increments or decrements it +## Keeps track of round number mod 4 in %ecx +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +&function_begin_B("_vpaes_schedule_mangle"); + &movdqa ("xmm4","xmm0"); # save xmm0 for later + &movdqa ("xmm5",&QWP($k_mc_forward,$const)); + &test ($out,$out); + &jnz (&label("schedule_mangle_dec")); + + # encrypting + &add ($key,16); + &pxor ("xmm4",&QWP($k_s63,$const)); + &pshufb ("xmm4","xmm5"); + &movdqa ("xmm3","xmm4"); + &pshufb ("xmm4","xmm5"); + &pxor ("xmm3","xmm4"); + &pshufb ("xmm4","xmm5"); + &pxor ("xmm3","xmm4"); + + &jmp (&label("schedule_mangle_both")); + +&set_label("schedule_mangle_dec",16); + # inverse mix columns + &movdqa ("xmm2",&QWP($k_s0F,$const)); + &lea ($inp,&DWP($k_dksd,$const)); + &movdqa ("xmm1","xmm2"); + &pandn ("xmm1","xmm4"); + &psrld ("xmm1",4); # 1 = hi + &pand ("xmm4","xmm2"); # 4 = lo + + &movdqa ("xmm2",&QWP(0,$inp)); + &pshufb ("xmm2","xmm4"); + &movdqa ("xmm3",&QWP(0x10,$inp)); + &pshufb ("xmm3","xmm1"); + &pxor ("xmm3","xmm2"); + &pshufb ("xmm3","xmm5"); + + &movdqa ("xmm2",&QWP(0x20,$inp)); + &pshufb ("xmm2","xmm4"); + &pxor ("xmm2","xmm3"); + &movdqa ("xmm3",&QWP(0x30,$inp)); + &pshufb ("xmm3","xmm1"); + &pxor ("xmm3","xmm2"); + &pshufb ("xmm3","xmm5"); + + &movdqa ("xmm2",&QWP(0x40,$inp)); + &pshufb ("xmm2","xmm4"); + &pxor ("xmm2","xmm3"); + &movdqa ("xmm3",&QWP(0x50,$inp)); + &pshufb ("xmm3","xmm1"); + &pxor ("xmm3","xmm2"); + &pshufb ("xmm3","xmm5"); + + &movdqa ("xmm2",&QWP(0x60,$inp)); + &pshufb ("xmm2","xmm4"); + &pxor ("xmm2","xmm3"); + &movdqa ("xmm3",&QWP(0x70,$inp)); + &pshufb ("xmm3","xmm1"); + &pxor ("xmm3","xmm2"); + + &add ($key,-16); + +&set_label("schedule_mangle_both"); + &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); + &pshufb ("xmm3","xmm1"); + &add ($magic,-16); + &and ($magic,0x30); + &movdqu (&QWP(0,$key),"xmm3"); + &ret (); +&function_end_B("_vpaes_schedule_mangle"); + +# +# Interface to OpenSSL +# +&function_begin("${PREFIX}_set_encrypt_key"); + &mov ($inp,&wparam(0)); # inp + &lea ($base,&DWP(-56,"esp")); + &mov ($round,&wparam(1)); # bits + &and ($base,-16); + &mov ($key,&wparam(2)); # key + &xchg ($base,"esp"); # alloca + &mov (&DWP(48,"esp"),$base); + + &mov ($base,$round); + &shr ($base,5); + &add ($base,5); + &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5; + &mov ($magic,0x30); + &mov ($out,0); + + &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &call ("_vpaes_schedule_core"); +&set_label("pic_point"); + + &mov ("esp",&DWP(48,"esp")); + &xor ("eax","eax"); +&function_end("${PREFIX}_set_encrypt_key"); + +&function_begin("${PREFIX}_set_decrypt_key"); + &mov ($inp,&wparam(0)); # inp + &lea ($base,&DWP(-56,"esp")); + &mov ($round,&wparam(1)); # bits + &and ($base,-16); + &mov ($key,&wparam(2)); # key + &xchg ($base,"esp"); # alloca + &mov (&DWP(48,"esp"),$base); + + &mov ($base,$round); + &shr ($base,5); + &add ($base,5); + &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5; + &shl ($base,4); + &lea ($key,&DWP(16,$key,$base)); + + &mov ($out,1); + &mov ($magic,$round); + &shr ($magic,1); + &and ($magic,32); + &xor ($magic,32); # nbist==192?0:32; + + &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &call ("_vpaes_schedule_core"); +&set_label("pic_point"); + + &mov ("esp",&DWP(48,"esp")); + &xor ("eax","eax"); +&function_end("${PREFIX}_set_decrypt_key"); + +&function_begin("${PREFIX}_encrypt"); + &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &call ("_vpaes_preheat"); +&set_label("pic_point"); + &mov ($inp,&wparam(0)); # inp + &lea ($base,&DWP(-56,"esp")); + &mov ($out,&wparam(1)); # out + &and ($base,-16); + &mov ($key,&wparam(2)); # key + &xchg ($base,"esp"); # alloca + &mov (&DWP(48,"esp"),$base); + + &movdqu ("xmm0",&QWP(0,$inp)); + &call ("_vpaes_encrypt_core"); + &movdqu (&QWP(0,$out),"xmm0"); + + &mov ("esp",&DWP(48,"esp")); +&function_end("${PREFIX}_encrypt"); + +&function_begin("${PREFIX}_decrypt"); + &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &call ("_vpaes_preheat"); +&set_label("pic_point"); + &mov ($inp,&wparam(0)); # inp + &lea ($base,&DWP(-56,"esp")); + &mov ($out,&wparam(1)); # out + &and ($base,-16); + &mov ($key,&wparam(2)); # key + &xchg ($base,"esp"); # alloca + &mov (&DWP(48,"esp"),$base); + + &movdqu ("xmm0",&QWP(0,$inp)); + &call ("_vpaes_decrypt_core"); + &movdqu (&QWP(0,$out),"xmm0"); + + &mov ("esp",&DWP(48,"esp")); +&function_end("${PREFIX}_decrypt"); + +&function_begin("${PREFIX}_cbc_encrypt"); + &mov ($inp,&wparam(0)); # inp + &mov ($out,&wparam(1)); # out + &mov ($round,&wparam(2)); # len + &mov ($key,&wparam(3)); # key + &sub ($round,16); + &jc (&label("cbc_abort")); + &lea ($base,&DWP(-56,"esp")); + &mov ($const,&wparam(4)); # ivp + &and ($base,-16); + &mov ($magic,&wparam(5)); # enc + &xchg ($base,"esp"); # alloca + &movdqu ("xmm1",&QWP(0,$const)); # load IV + &sub ($out,$inp); + &mov (&DWP(48,"esp"),$base); + + &mov (&DWP(0,"esp"),$out); # save out + &mov (&DWP(4,"esp"),$key) # save key + &mov (&DWP(8,"esp"),$const); # save ivp + &mov ($out,$round); # $out works as $len + + &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &call ("_vpaes_preheat"); +&set_label("pic_point"); + &cmp ($magic,0); + &je (&label("cbc_dec_loop")); + &jmp (&label("cbc_enc_loop")); + +&set_label("cbc_enc_loop",16); + &movdqu ("xmm0",&QWP(0,$inp)); # load input + &pxor ("xmm0","xmm1"); # inp^=iv + &call ("_vpaes_encrypt_core"); + &mov ($base,&DWP(0,"esp")); # restore out + &mov ($key,&DWP(4,"esp")); # restore key + &movdqa ("xmm1","xmm0"); + &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output + &lea ($inp,&DWP(16,$inp)); + &sub ($out,16); + &jnc (&label("cbc_enc_loop")); + &jmp (&label("cbc_done")); + +&set_label("cbc_dec_loop",16); + &movdqu ("xmm0",&QWP(0,$inp)); # load input + &movdqa (&QWP(16,"esp"),"xmm1"); # save IV + &movdqa (&QWP(32,"esp"),"xmm0"); # save future IV + &call ("_vpaes_decrypt_core"); + &mov ($base,&DWP(0,"esp")); # restore out + &mov ($key,&DWP(4,"esp")); # restore key + &pxor ("xmm0",&QWP(16,"esp")); # out^=iv + &movdqa ("xmm1",&QWP(32,"esp")); # load next IV + &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output + &lea ($inp,&DWP(16,$inp)); + &sub ($out,16); + &jnc (&label("cbc_dec_loop")); + +&set_label("cbc_done"); + &mov ($base,&DWP(8,"esp")); # restore ivp + &mov ("esp",&DWP(48,"esp")); + &movdqu (&QWP(0,$base),"xmm1"); # write IV +&set_label("cbc_abort"); +&function_end("${PREFIX}_cbc_encrypt"); + +&asm_finish(); diff --git a/app/openssl/crypto/aes/asm/vpaes-x86_64.S b/app/openssl/crypto/aes/asm/vpaes-x86_64.S new file mode 100644 index 00000000..0162631f --- /dev/null +++ b/app/openssl/crypto/aes/asm/vpaes-x86_64.S @@ -0,0 +1,828 @@ +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addq $16,%r9 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa %xmm15,%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm3,%xmm0 + subq $1,%rax + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 + movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addq $16,%r9 + +.byte 102,15,56,0,197 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subq $1,%rax + +.byte 102,15,56,0,197 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,56,0,197 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,58,15,237,12 + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + .byte 0xf3,0xc3 +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 64 +.size _vpaes_consts,.-_vpaes_consts diff --git a/app/openssl/crypto/aes/asm/vpaes-x86_64.pl b/app/openssl/crypto/aes/asm/vpaes-x86_64.pl new file mode 100644 index 00000000..bd7f45b8 --- /dev/null +++ b/app/openssl/crypto/aes/asm/vpaes-x86_64.pl @@ -0,0 +1,1207 @@ +#!/usr/bin/env perl + +###################################################################### +## Constant-time SSSE3 AES core implementation. +## version 0.1 +## +## By Mike Hamburg (Stanford University), 2009 +## Public domain. +## +## For details see http://shiftleft.org/papers/vector_aes/ and +## http://crypto.stanford.edu/vpaes/. + +###################################################################### +# September 2011. +# +# Interface to OpenSSL as "almost" drop-in replacement for +# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt +# doesn't handle partial vectors (doesn't have to if called from +# EVP only). "Drop-in" implies that this module doesn't share key +# schedule structure with the original nor does it make assumption +# about its alignment... +# +# Performance summary. aes-x86_64.pl column lists large-block CBC +# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per +# byte processed with 128-bit key, and vpaes-x86_64.pl column - +# [also large-block CBC] encrypt/decrypt. +# +# aes-x86_64.pl vpaes-x86_64.pl +# +# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***) +# Nehalem 30.5/42.2/14.6 9.8/11.8 +# Atom 63.9/79.0/32.1 64.0/84.8(***) +# +# (*) "Hyper-threading" in the context refers rather to cache shared +# among multiple cores, than to specifically Intel HTT. As vast +# majority of contemporary cores share cache, slower code path +# is common place. In other words "with-hyper-threading-off" +# results are presented mostly for reference purposes. +# +# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. +# +# (***) Less impressive improvement on Core 2 and Atom is due to slow +# pshufb, yet it's respectable +40%/78% improvement on Core 2 +# (as implied, over "hyper-threading-safe" code path). +# +# + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$PREFIX="vpaes"; + +$code.=<<___; +.text + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in _vpaes_preheat +## (%rdx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax +## Preserves %xmm6 - %xmm8 so you get some local vectors +## +## +.type _vpaes_encrypt_core,\@abi-omnipotent +.align 16 +_vpaes_encrypt_core: + mov %rdx, %r9 + mov \$16, %r11 + mov 240(%rdx),%eax + movdqa %xmm9, %xmm1 + movdqa .Lk_ipt(%rip), %xmm2 # iptlo + pandn %xmm0, %xmm1 + movdqu (%r9), %xmm5 # round0 key + psrld \$4, %xmm1 + pand %xmm9, %xmm0 + pshufb %xmm0, %xmm2 + movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi + pshufb %xmm1, %xmm0 + pxor %xmm5, %xmm2 + pxor %xmm2, %xmm0 + add \$16, %r9 + lea .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + # middle of middle round + movdqa %xmm13, %xmm4 # 4 : sb1u + pshufb %xmm2, %xmm4 # 4 = sb1u + pxor %xmm5, %xmm4 # 4 = sb1u + k + movdqa %xmm12, %xmm0 # 0 : sb1t + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = A + movdqa %xmm15, %xmm5 # 4 : sb2u + pshufb %xmm2, %xmm5 # 4 = sb2u + movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + movdqa %xmm14, %xmm2 # 2 : sb2t + pshufb %xmm3, %xmm2 # 2 = sb2t + pxor %xmm5, %xmm2 # 2 = 2A + movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + movdqa %xmm0, %xmm3 # 3 = A + pshufb %xmm1, %xmm0 # 0 = B + add \$16, %r9 # next key + pxor %xmm2, %xmm0 # 0 = 2A+B + pshufb %xmm4, %xmm3 # 3 = D + add \$16, %r11 # next mc + pxor %xmm0, %xmm3 # 3 = 2A+B+D + pshufb %xmm1, %xmm0 # 0 = 2B+C + and \$0x30, %r11 # ... mod 4 + pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D + sub \$1,%rax # nr-- + +.Lenc_entry: + # top of round + movdqa %xmm9, %xmm1 # 1 : i + pandn %xmm0, %xmm1 # 1 = i<<4 + psrld \$4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm5 # 2 : a/k + pshufb %xmm0, %xmm5 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + movdqu (%r9), %xmm5 + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + jnz .Lenc_loop + + # middle of last round + movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + pshufb %xmm2, %xmm4 # 4 = sbou + pxor %xmm5, %xmm4 # 4 = sb1u + k + pshufb %xmm3, %xmm0 # 0 = sb1t + movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + pxor %xmm4, %xmm0 # 0 = A + pshufb %xmm1, %xmm0 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + +## +## Decryption core +## +## Same API as encryption core. +## +.type _vpaes_decrypt_core,\@abi-omnipotent +.align 16 +_vpaes_decrypt_core: + mov %rdx, %r9 # load key + mov 240(%rdx),%eax + movdqa %xmm9, %xmm1 + movdqa .Lk_dipt(%rip), %xmm2 # iptlo + pandn %xmm0, %xmm1 + mov %rax, %r11 + psrld \$4, %xmm1 + movdqu (%r9), %xmm5 # round0 key + shl \$4, %r11 + pand %xmm9, %xmm0 + pshufb %xmm0, %xmm2 + movdqa .Lk_dipt+16(%rip), %xmm0 # ipthi + xor \$0x30, %r11 + lea .Lk_dsbd(%rip),%r10 + pshufb %xmm1, %xmm0 + and \$0x30, %r11 + pxor %xmm5, %xmm2 + movdqa .Lk_mc_forward+48(%rip), %xmm5 + pxor %xmm2, %xmm0 + add \$16, %r9 + add %r10, %r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: +## +## Inverse mix columns +## + movdqa -0x20(%r10),%xmm4 # 4 : sb9u + pshufb %xmm2, %xmm4 # 4 = sb9u + pxor %xmm0, %xmm4 + movdqa -0x10(%r10),%xmm0 # 0 : sb9t + pshufb %xmm3, %xmm0 # 0 = sb9t + pxor %xmm4, %xmm0 # 0 = ch + add \$16, %r9 # next round key + + pshufb %xmm5, %xmm0 # MC ch + movdqa 0x00(%r10),%xmm4 # 4 : sbdu + pshufb %xmm2, %xmm4 # 4 = sbdu + pxor %xmm0, %xmm4 # 4 = ch + movdqa 0x10(%r10),%xmm0 # 0 : sbdt + pshufb %xmm3, %xmm0 # 0 = sbdt + pxor %xmm4, %xmm0 # 0 = ch + sub \$1,%rax # nr-- + + pshufb %xmm5, %xmm0 # MC ch + movdqa 0x20(%r10),%xmm4 # 4 : sbbu + pshufb %xmm2, %xmm4 # 4 = sbbu + pxor %xmm0, %xmm4 # 4 = ch + movdqa 0x30(%r10),%xmm0 # 0 : sbbt + pshufb %xmm3, %xmm0 # 0 = sbbt + pxor %xmm4, %xmm0 # 0 = ch + + pshufb %xmm5, %xmm0 # MC ch + movdqa 0x40(%r10),%xmm4 # 4 : sbeu + pshufb %xmm2, %xmm4 # 4 = sbeu + pxor %xmm0, %xmm4 # 4 = ch + movdqa 0x50(%r10),%xmm0 # 0 : sbet + pshufb %xmm3, %xmm0 # 0 = sbet + pxor %xmm4, %xmm0 # 0 = ch + + palignr \$12, %xmm5, %xmm5 + +.Ldec_entry: + # top of round + movdqa %xmm9, %xmm1 # 1 : i + pandn %xmm0, %xmm1 # 1 = i<<4 + psrld \$4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm2 # 2 : a/k + pshufb %xmm0, %xmm2 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + movdqu (%r9), %xmm0 + jnz .Ldec_loop + + # middle of last round + movdqa 0x60(%r10), %xmm4 # 3 : sbou + pshufb %xmm2, %xmm4 # 4 = sbou + pxor %xmm0, %xmm4 # 4 = sb1u + k + movdqa 0x70(%r10), %xmm0 # 0 : sbot + movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = A + pshufb %xmm2, %xmm0 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## +.type _vpaes_schedule_core,\@abi-omnipotent +.align 16 +_vpaes_schedule_core: + # rdi = key + # rsi = size in bits + # rdx = buffer + # rcx = direction. 0=encrypt, 1=decrypt + + call _vpaes_preheat # load the tables + movdqa .Lk_rcon(%rip), %xmm8 # load rcon + movdqu (%rdi), %xmm0 # load key (unaligned) + + # input transform + movdqa %xmm0, %xmm3 + lea .Lk_ipt(%rip), %r11 + call _vpaes_schedule_transform + movdqa %xmm0, %xmm7 + + lea .Lk_sr(%rip),%r10 + test %rcx, %rcx + jnz .Lschedule_am_decrypting + + # encrypting, output zeroth round key after transform + movdqu %xmm0, (%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + # decrypting, output zeroth round key after shiftrows + movdqa (%r8,%r10),%xmm1 + pshufb %xmm1, %xmm3 + movdqu %xmm3, (%rdx) + xor \$0x30, %r8 + +.Lschedule_go: + cmp \$192, %esi + ja .Lschedule_256 + je .Lschedule_192 + # 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +.Lschedule_128: + mov \$10, %esi + +.Loop_schedule_128: + call _vpaes_schedule_round + dec %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle # write output + jmp .Loop_schedule_128 + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + call _vpaes_schedule_transform # input transform + movdqa %xmm0, %xmm6 # save short part + pxor %xmm4, %xmm4 # clear 4 + movhlps %xmm4, %xmm6 # clobber low side with zeros + mov \$4, %esi + +.Loop_schedule_192: + call _vpaes_schedule_round + palignr \$8,%xmm6,%xmm0 + call _vpaes_schedule_mangle # save key n + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle # save key n+1 + call _vpaes_schedule_round + dec %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle # save key n+2 + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + call _vpaes_schedule_transform # input transform + mov \$7, %esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle # output low result + movdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + # high round + call _vpaes_schedule_round + dec %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + # low round. swap xmm7 and xmm6 + pshufd \$0xFF, %xmm0, %xmm0 + movdqa %xmm7, %xmm5 + movdqa %xmm6, %xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5, %xmm7 + + jmp .Loop_schedule_256 + + +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.align 16 +.Lschedule_mangle_last: + # schedule last round key from xmm0 + lea .Lk_deskew(%rip),%r11 # prepare to deskew + test %rcx, %rcx + jnz .Lschedule_mangle_last_dec + + # encrypting + movdqa (%r8,%r10),%xmm1 + pshufb %xmm1, %xmm0 # output permute + lea .Lk_opt(%rip), %r11 # prepare to output transform + add \$32, %rdx + +.Lschedule_mangle_last_dec: + add \$-16, %rdx + pxor .Lk_s63(%rip), %xmm0 + call _vpaes_schedule_transform # output transform + movdqu %xmm0, (%rdx) # save last key + + # cleanup + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + pxor %xmm4, %xmm4 + pxor %xmm5, %xmm5 + pxor %xmm6, %xmm6 + pxor %xmm7, %xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +.type _vpaes_schedule_192_smear,\@abi-omnipotent +.align 16 +_vpaes_schedule_192_smear: + pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0 + pxor %xmm0, %xmm6 # -> c+d c 0 0 + pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + pxor %xmm0, %xmm6 # -> b+c+d b+c b a + movdqa %xmm6, %xmm0 + pxor %xmm1, %xmm1 + movhlps %xmm1, %xmm6 # clobber low side with zeros + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## +.type _vpaes_schedule_round,\@abi-omnipotent +.align 16 +_vpaes_schedule_round: + # extract rcon from xmm8 + pxor %xmm1, %xmm1 + palignr \$15, %xmm8, %xmm1 + palignr \$15, %xmm8, %xmm8 + pxor %xmm1, %xmm7 + + # rotate + pshufd \$0xFF, %xmm0, %xmm0 + palignr \$1, %xmm0, %xmm0 + + # fall through... + + # low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + # smear xmm7 + movdqa %xmm7, %xmm1 + pslldq \$4, %xmm7 + pxor %xmm1, %xmm7 + movdqa %xmm7, %xmm1 + pslldq \$8, %xmm7 + pxor %xmm1, %xmm7 + pxor .Lk_s63(%rip), %xmm7 + + # subbytes + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld \$4, %xmm1 # 1 = i + pand %xmm9, %xmm0 # 0 = k + movdqa %xmm11, %xmm2 # 2 : a/k + pshufb %xmm0, %xmm2 # 2 = a/k + pxor %xmm1, %xmm0 # 0 = j + movdqa %xmm10, %xmm3 # 3 : 1/i + pshufb %xmm1, %xmm3 # 3 = 1/i + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k + movdqa %xmm10, %xmm4 # 4 : 1/j + pshufb %xmm0, %xmm4 # 4 = 1/j + pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k + movdqa %xmm10, %xmm2 # 2 : 1/iak + pshufb %xmm3, %xmm2 # 2 = 1/iak + pxor %xmm0, %xmm2 # 2 = io + movdqa %xmm10, %xmm3 # 3 : 1/jak + pshufb %xmm4, %xmm3 # 3 = 1/jak + pxor %xmm1, %xmm3 # 3 = jo + movdqa %xmm13, %xmm4 # 4 : sbou + pshufb %xmm2, %xmm4 # 4 = sbou + movdqa %xmm12, %xmm0 # 0 : sbot + pshufb %xmm3, %xmm0 # 0 = sb1t + pxor %xmm4, %xmm0 # 0 = sbox output + + # add in smeared stuff + pxor %xmm7, %xmm0 + movdqa %xmm0, %xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm1, %xmm2 +## +.type _vpaes_schedule_transform,\@abi-omnipotent +.align 16 +_vpaes_schedule_transform: + movdqa %xmm9, %xmm1 + pandn %xmm0, %xmm1 + psrld \$4, %xmm1 + pand %xmm9, %xmm0 + movdqa (%r11), %xmm2 # lo + pshufb %xmm0, %xmm2 + movdqa 16(%r11), %xmm0 # hi + pshufb %xmm1, %xmm0 + pxor %xmm2, %xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +.type _vpaes_schedule_mangle,\@abi-omnipotent +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0, %xmm4 # save xmm0 for later + movdqa .Lk_mc_forward(%rip),%xmm5 + test %rcx, %rcx + jnz .Lschedule_mangle_dec + + # encrypting + add \$16, %rdx + pxor .Lk_s63(%rip),%xmm4 + pshufb %xmm5, %xmm4 + movdqa %xmm4, %xmm3 + pshufb %xmm5, %xmm4 + pxor %xmm4, %xmm3 + pshufb %xmm5, %xmm4 + pxor %xmm4, %xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + # inverse mix columns + lea .Lk_dksd(%rip),%r11 + movdqa %xmm9, %xmm1 + pandn %xmm4, %xmm1 + psrld \$4, %xmm1 # 1 = hi + pand %xmm9, %xmm4 # 4 = lo + + movdqa 0x00(%r11), %xmm2 + pshufb %xmm4, %xmm2 + movdqa 0x10(%r11), %xmm3 + pshufb %xmm1, %xmm3 + pxor %xmm2, %xmm3 + pshufb %xmm5, %xmm3 + + movdqa 0x20(%r11), %xmm2 + pshufb %xmm4, %xmm2 + pxor %xmm3, %xmm2 + movdqa 0x30(%r11), %xmm3 + pshufb %xmm1, %xmm3 + pxor %xmm2, %xmm3 + pshufb %xmm5, %xmm3 + + movdqa 0x40(%r11), %xmm2 + pshufb %xmm4, %xmm2 + pxor %xmm3, %xmm2 + movdqa 0x50(%r11), %xmm3 + pshufb %xmm1, %xmm3 + pxor %xmm2, %xmm3 + pshufb %xmm5, %xmm3 + + movdqa 0x60(%r11), %xmm2 + pshufb %xmm4, %xmm2 + pxor %xmm3, %xmm2 + movdqa 0x70(%r11), %xmm3 + pshufb %xmm1, %xmm3 + pxor %xmm2, %xmm3 + + add \$-16, %rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10),%xmm1 + pshufb %xmm1,%xmm3 + add \$-16, %r8 + and \$0x30, %r8 + movdqu %xmm3, (%rdx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + +# +# Interface to OpenSSL +# +.globl ${PREFIX}_set_encrypt_key +.type ${PREFIX}_set_encrypt_key,\@function,3 +.align 16 +${PREFIX}_set_encrypt_key: +___ +$code.=<<___ if ($win64); + lea -0xb8(%rsp),%rsp + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) +.Lenc_key_body: +___ +$code.=<<___; + mov %esi,%eax + shr \$5,%eax + add \$5,%eax + mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov \$0,%ecx + mov \$0x30,%r8d + call _vpaes_schedule_core +___ +$code.=<<___ if ($win64); + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 + lea 0xb8(%rsp),%rsp +.Lenc_key_epilogue: +___ +$code.=<<___; + xor %eax,%eax + ret +.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key + +.globl ${PREFIX}_set_decrypt_key +.type ${PREFIX}_set_decrypt_key,\@function,3 +.align 16 +${PREFIX}_set_decrypt_key: +___ +$code.=<<___ if ($win64); + lea -0xb8(%rsp),%rsp + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) +.Ldec_key_body: +___ +$code.=<<___; + mov %esi,%eax + shr \$5,%eax + add \$5,%eax + mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + shl \$4,%eax + lea 16(%rdx,%rax),%rdx + + mov \$1,%ecx + mov %esi,%r8d + shr \$1,%r8d + and \$32,%r8d + xor \$32,%r8d # nbits==192?0:32 + call _vpaes_schedule_core +___ +$code.=<<___ if ($win64); + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 + lea 0xb8(%rsp),%rsp +.Ldec_key_epilogue: +___ +$code.=<<___; + xor %eax,%eax + ret +.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key + +.globl ${PREFIX}_encrypt +.type ${PREFIX}_encrypt,\@function,3 +.align 16 +${PREFIX}_encrypt: +___ +$code.=<<___ if ($win64); + lea -0xb8(%rsp),%rsp + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) +.Lenc_body: +___ +$code.=<<___; + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) +___ +$code.=<<___ if ($win64); + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 + lea 0xb8(%rsp),%rsp +.Lenc_epilogue: +___ +$code.=<<___; + ret +.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt + +.globl ${PREFIX}_decrypt +.type ${PREFIX}_decrypt,\@function,3 +.align 16 +${PREFIX}_decrypt: +___ +$code.=<<___ if ($win64); + lea -0xb8(%rsp),%rsp + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) +.Ldec_body: +___ +$code.=<<___; + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) +___ +$code.=<<___ if ($win64); + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 + lea 0xb8(%rsp),%rsp +.Ldec_epilogue: +___ +$code.=<<___; + ret +.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt +___ +{ +my ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); +# void AES_cbc_encrypt (const void char *inp, unsigned char *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivp,const int enc); +$code.=<<___; +.globl ${PREFIX}_cbc_encrypt +.type ${PREFIX}_cbc_encrypt,\@function,6 +.align 16 +${PREFIX}_cbc_encrypt: + xchg $key,$len +___ +($len,$key)=($key,$len); +$code.=<<___; + sub \$16,$len + jc .Lcbc_abort +___ +$code.=<<___ if ($win64); + lea -0xb8(%rsp),%rsp + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) +.Lcbc_body: +___ +$code.=<<___; + movdqu ($ivp),%xmm6 # load IV + sub $inp,$out + call _vpaes_preheat + cmp \$0,${enc}d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu ($inp),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,($out,$inp) + lea 16($inp),$inp + sub \$16,$len + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu ($inp),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,($out,$inp) + lea 16($inp),$inp + sub \$16,$len + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,($ivp) # save IV +___ +$code.=<<___ if ($win64); + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 + lea 0xb8(%rsp),%rsp +.Lcbc_epilogue: +___ +$code.=<<___; +.Lcbc_abort: + ret +.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt +___ +} +$code.=<<___; +## +## _aes_preheat +## +## Fills register %r10 -> .aes_consts (so you can -fPIC) +## and %xmm9-%xmm15 as specified below. +## +.type _vpaes_preheat,\@abi-omnipotent +.align 16 +_vpaes_preheat: + lea .Lk_s0F(%rip), %r10 + movdqa -0x20(%r10), %xmm10 # .Lk_inv + movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 + movdqa 0x00(%r10), %xmm9 # .Lk_s0F + movdqa 0x30(%r10), %xmm13 # .Lk_sb1 + movdqa 0x40(%r10), %xmm12 # .Lk_sb1+16 + movdqa 0x50(%r10), %xmm15 # .Lk_sb2 + movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 + ret +.size _vpaes_preheat,.-_vpaes_preheat +######################################################## +## ## +## Constants ## +## ## +######################################################## +.type _vpaes_consts,\@object +.align 64 +_vpaes_consts: +.Lk_inv: # inv, inva + .quad 0x0E05060F0D080180, 0x040703090A0B0C02 + .quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: # s0F + .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: # input transform (lo, hi) + .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 + .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: # sb1u, sb1t + .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 + .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: # sb2u, sb2t + .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: # sbou, sbot + .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 + .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: # mc_forward + .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 + .quad 0x080B0A0904070605, 0x000302010C0F0E0D + .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 + .quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward:# mc_backward + .quad 0x0605040702010003, 0x0E0D0C0F0A09080B + .quad 0x020100030E0D0C0F, 0x0A09080B06050407 + .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 + .quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: # sr + .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 + .quad 0x030E09040F0A0500, 0x0B06010C07020D08 + .quad 0x0F060D040B020900, 0x070E050C030A0108 + .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: # rcon + .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: # s63: all equal to 0x63 transformed + .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: # output transform + .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 + .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: # deskew tables: inverts the sbox's "skew" + .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A + .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + +## +## Decryption stuff +## Key schedule constants +## +.Lk_dksd: # decryption key schedule: invskew x*D + .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 + .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: # decryption key schedule: invskew x*B + .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 + .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: # decryption key schedule: invskew x*E + 0x63 + .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 + .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: # decryption key schedule: invskew x*9 + .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC + .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +## +## Decryption stuff +## Round function constants +## +.Lk_dipt: # decryption input transform + .quad 0x0F505B040B545F00, 0x154A411E114E451A + .quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: # decryption sbox output *9*u, *9*t + .quad 0x851C03539A86D600, 0xCAD51F504F994CC9 + .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: # decryption sbox output *D*u, *D*t + .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 + .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: # decryption sbox output *B*u, *B*t + .quad 0xD022649296B44200, 0x602646F6B0F2D404 + .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: # decryption sbox output *E*u, *E*t + .quad 0x46F2929626D4D000, 0x2242600464B4F6B0 + .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: # decryption sbox final output + .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D + .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" +.align 64 +.size _vpaes_consts,.-_vpaes_consts +___ + +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + lea 16(%rax),%rsi # %xmm save area + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + lea 0xb8(%rax),%rax # adjust stack pointer + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$`1232/8`,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_${PREFIX}_set_encrypt_key + .rva .LSEH_end_${PREFIX}_set_encrypt_key + .rva .LSEH_info_${PREFIX}_set_encrypt_key + + .rva .LSEH_begin_${PREFIX}_set_decrypt_key + .rva .LSEH_end_${PREFIX}_set_decrypt_key + .rva .LSEH_info_${PREFIX}_set_decrypt_key + + .rva .LSEH_begin_${PREFIX}_encrypt + .rva .LSEH_end_${PREFIX}_encrypt + .rva .LSEH_info_${PREFIX}_encrypt + + .rva .LSEH_begin_${PREFIX}_decrypt + .rva .LSEH_end_${PREFIX}_decrypt + .rva .LSEH_info_${PREFIX}_decrypt + + .rva .LSEH_begin_${PREFIX}_cbc_encrypt + .rva .LSEH_end_${PREFIX}_cbc_encrypt + .rva .LSEH_info_${PREFIX}_cbc_encrypt + +.section .xdata +.align 8 +.LSEH_info_${PREFIX}_set_encrypt_key: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc_key_body,.Lenc_key_epilogue # HandlerData[] +.LSEH_info_${PREFIX}_set_decrypt_key: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec_key_body,.Ldec_key_epilogue # HandlerData[] +.LSEH_info_${PREFIX}_encrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc_body,.Lenc_epilogue # HandlerData[] +.LSEH_info_${PREFIX}_decrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec_body,.Ldec_epilogue # HandlerData[] +.LSEH_info_${PREFIX}_cbc_encrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Lcbc_body,.Lcbc_epilogue # HandlerData[] +___ +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/arm_arch.h b/app/openssl/crypto/arm_arch.h new file mode 100644 index 00000000..5a831076 --- /dev/null +++ b/app/openssl/crypto/arm_arch.h @@ -0,0 +1,51 @@ +#ifndef __ARM_ARCH_H__ +#define __ARM_ARCH_H__ + +#if !defined(__ARM_ARCH__) +# if defined(__CC_ARM) +# define __ARM_ARCH__ __TARGET_ARCH_ARM +# if defined(__BIG_ENDIAN) +# define __ARMEB__ +# else +# define __ARMEL__ +# endif +# elif defined(__GNUC__) + /* + * Why doesn't gcc define __ARM_ARCH__? Instead it defines + * bunch of below macros. See all_architectires[] table in + * gcc/config/arm/arm.c. On a side note it defines + * __ARMEL__/__ARMEB__ for little-/big-endian. + */ +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \ + defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH__ 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) +# define __ARM_ARCH__ 4 +# else +# error "unsupported ARM architecture" +# endif +# endif +#endif + +#ifdef OPENSSL_FIPSCANISTER +#include +#endif + +#if !__ASSEMBLER__ +extern unsigned int OPENSSL_armcap_P; + +#define ARMV7_NEON (1<<0) +#define ARMV7_TICK (1<<1) +#endif + +#endif diff --git a/app/openssl/crypto/armcap.c b/app/openssl/crypto/armcap.c new file mode 100644 index 00000000..9abaf396 --- /dev/null +++ b/app/openssl/crypto/armcap.c @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include + +#include "arm_arch.h" + +unsigned int OPENSSL_armcap_P; + +static sigset_t all_masked; + +static sigjmp_buf ill_jmp; +static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } + +/* + * Following subroutines could have been inlined, but it's not all + * ARM compilers support inline assembler... + */ +void _armv7_neon_probe(void); +unsigned int _armv7_tick(void); + +unsigned int OPENSSL_rdtsc(void) + { + if (OPENSSL_armcap_P & ARMV7_TICK) + return _armv7_tick(); + else + return 0; + } + +#if defined(__GNUC__) && __GNUC__>=2 +void OPENSSL_cpuid_setup(void) __attribute__((constructor)); +#endif +void OPENSSL_cpuid_setup(void) + { + char *e; + struct sigaction ill_oact,ill_act; + sigset_t oset; + static int trigger=0; + + if (trigger) return; + trigger=1; + + if ((e=getenv("OPENSSL_armcap"))) + { + OPENSSL_armcap_P=strtoul(e,NULL,0); + return; + } + + sigfillset(&all_masked); + sigdelset(&all_masked,SIGILL); + sigdelset(&all_masked,SIGTRAP); + sigdelset(&all_masked,SIGFPE); + sigdelset(&all_masked,SIGBUS); + sigdelset(&all_masked,SIGSEGV); + + OPENSSL_armcap_P = 0; + + memset(&ill_act,0,sizeof(ill_act)); + ill_act.sa_handler = ill_handler; + ill_act.sa_mask = all_masked; + + sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset); + sigaction(SIGILL,&ill_act,&ill_oact); + + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv7_neon_probe(); + OPENSSL_armcap_P |= ARMV7_NEON; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv7_tick(); + OPENSSL_armcap_P |= ARMV7_TICK; + } + + sigaction (SIGILL,&ill_oact,NULL); + sigprocmask(SIG_SETMASK,&oset,NULL); + } diff --git a/app/openssl/crypto/armv4cpuid.S b/app/openssl/crypto/armv4cpuid.S new file mode 100644 index 00000000..2d618dea --- /dev/null +++ b/app/openssl/crypto/armv4cpuid.S @@ -0,0 +1,154 @@ +#include "arm_arch.h" + +.text +.code 32 + +.align 5 +.global _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + .word 0xf26ee1fe @ vorr q15,q15,q15 + .word 0xe12fff1e @ bx lr +.size _armv7_neon_probe,.-_armv7_neon_probe + +.global _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + mrc p15,0,r0,c9,c13,0 + .word 0xe12fff1e @ bx lr +.size _armv7_tick,.-_armv7_tick + +.global OPENSSL_atomic_add +.type OPENSSL_atomic_add,%function +OPENSSL_atomic_add: +#if __ARM_ARCH__>=6 +.Ladd: ldrex r2,[r0] + add r3,r2,r1 + strex r2,r3,[r0] + cmp r2,#0 + bne .Ladd + mov r0,r3 + .word 0xe12fff1e @ bx lr +#else + stmdb sp!,{r4-r6,lr} + ldr r2,.Lspinlock + adr r3,.Lspinlock + mov r4,r0 + mov r5,r1 + add r6,r3,r2 @ &spinlock + b .+8 +.Lspin: bl sched_yield + mov r0,#-1 + swp r0,r0,[r6] + cmp r0,#0 + bne .Lspin + + ldr r2,[r4] + add r2,r2,r5 + str r2,[r4] + str r0,[r6] @ release spinlock + ldmia sp!,{r4-r6,lr} + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +#endif +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.global OPENSSL_cleanse +.type OPENSSL_cleanse,%function +OPENSSL_cleanse: + eor ip,ip,ip + cmp r1,#7 + subhs r1,r1,#4 + bhs .Lot + cmp r1,#0 + beq .Lcleanse_done +.Little: + strb ip,[r0],#1 + subs r1,r1,#1 + bhi .Little + b .Lcleanse_done + +.Lot: tst r0,#3 + beq .Laligned + strb ip,[r0],#1 + sub r1,r1,#1 + b .Lot +.Laligned: + str ip,[r0],#4 + subs r1,r1,#4 + bhs .Laligned + adds r1,r1,#4 + bne .Little +.Lcleanse_done: + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.global OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,%function +OPENSSL_wipe_cpu: + ldr r0,.LOPENSSL_armcap + adr r1,.LOPENSSL_armcap + ldr r0,[r1,r0] + eor r2,r2,r2 + eor r3,r3,r3 + eor ip,ip,ip + tst r0,#1 + beq .Lwipe_done + .word 0xf3000150 @ veor q0, q0, q0 + .word 0xf3022152 @ veor q1, q1, q1 + .word 0xf3044154 @ veor q2, q2, q2 + .word 0xf3066156 @ veor q3, q3, q3 + .word 0xf34001f0 @ veor q8, q8, q8 + .word 0xf34221f2 @ veor q9, q9, q9 + .word 0xf34441f4 @ veor q10, q10, q10 + .word 0xf34661f6 @ veor q11, q11, q11 + .word 0xf34881f8 @ veor q12, q12, q12 + .word 0xf34aa1fa @ veor q13, q13, q13 + .word 0xf34cc1fc @ veor q14, q14, q14 + .word 0xf34ee1fe @ veor q15, q15, q15 +.Lwipe_done: + mov r0,sp + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.global OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,%function +OPENSSL_instrument_bus: + eor r0,r0,r0 + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.global OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,%function +OPENSSL_instrument_bus2: + eor r0,r0,r0 + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 + +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.LOPENSSL_armcap +#if __ARM_ARCH__>=6 +.align 5 +#else +.Lspinlock: +.word atomic_add_spinlock-.Lspinlock +.align 5 + +.data +.align 2 +atomic_add_spinlock: +.word 0 +#endif + +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P diff --git a/app/openssl/crypto/asn1/a_d2i_fp.c b/app/openssl/crypto/asn1/a_d2i_fp.c index ece40bc4..52b2ebdb 100644 --- a/app/openssl/crypto/asn1/a_d2i_fp.c +++ b/app/openssl/crypto/asn1/a_d2i_fp.c @@ -57,6 +57,7 @@ */ #include +#include #include "cryptlib.h" #include #include @@ -143,17 +144,11 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) BUF_MEM *b; unsigned char *p; int i; - int ret=-1; ASN1_const_CTX c; - int want=HEADER_SIZE; + size_t want=HEADER_SIZE; int eos=0; -#if defined(__GNUC__) && defined(__ia64) - /* pathetic compiler bug in all known versions as of Nov. 2002 */ - long off=0; -#else - int off=0; -#endif - int len=0; + size_t off=0; + size_t len=0; b=BUF_MEM_new(); if (b == NULL) @@ -169,7 +164,7 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) { want-=(len-off); - if (!BUF_MEM_grow_clean(b,len+want)) + if (len + want < len || !BUF_MEM_grow_clean(b,len+want)) { ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ERR_R_MALLOC_FAILURE); goto err; @@ -181,7 +176,14 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) goto err; } if (i > 0) + { + if (len+i < len) + { + ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ASN1_R_TOO_LONG); + goto err; + } len+=i; + } } /* else data already loaded */ @@ -206,6 +208,11 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) { /* no data body so go round again */ eos++; + if (eos < 0) + { + ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ASN1_R_HEADER_TOO_LONG); + goto err; + } want=HEADER_SIZE; } else if (eos && (c.slen == 0) && (c.tag == V_ASN1_EOC)) @@ -220,10 +227,16 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) else { /* suck in c.slen bytes of data */ - want=(int)c.slen; + want=c.slen; if (want > (len-off)) { want-=(len-off); + if (want > INT_MAX /* BIO_read takes an int length */ || + len+want < len) + { + ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ASN1_R_TOO_LONG); + goto err; + } if (!BUF_MEM_grow_clean(b,len+want)) { ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ERR_R_MALLOC_FAILURE); @@ -238,11 +251,18 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) ASN1_R_NOT_ENOUGH_DATA); goto err; } + /* This can't overflow because + * |len+want| didn't overflow. */ len+=i; - want -= i; + want-=i; } } - off+=(int)c.slen; + if (off + c.slen < off) + { + ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ASN1_R_TOO_LONG); + goto err; + } + off+=c.slen; if (eos <= 0) { break; @@ -252,9 +272,15 @@ static int asn1_d2i_read_bio(BIO *in, BUF_MEM **pb) } } + if (off > INT_MAX) + { + ASN1err(ASN1_F_ASN1_D2I_READ_BIO,ASN1_R_TOO_LONG); + goto err; + } + *pb = b; return off; err: if (b != NULL) BUF_MEM_free(b); - return(ret); + return -1; } diff --git a/app/openssl/crypto/asn1/a_digest.c b/app/openssl/crypto/asn1/a_digest.c index d00d9e22..cbdeea6a 100644 --- a/app/openssl/crypto/asn1/a_digest.c +++ b/app/openssl/crypto/asn1/a_digest.c @@ -87,7 +87,8 @@ int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, p=str; i2d(data,&p); - EVP_Digest(str, i, md, len, type, NULL); + if (!EVP_Digest(str, i, md, len, type, NULL)) + return 0; OPENSSL_free(str); return(1); } @@ -104,7 +105,8 @@ int ASN1_item_digest(const ASN1_ITEM *it, const EVP_MD *type, void *asn, i=ASN1_item_i2d(asn,&str, it); if (!str) return(0); - EVP_Digest(str, i, md, len, type, NULL); + if (!EVP_Digest(str, i, md, len, type, NULL)) + return 0; OPENSSL_free(str); return(1); } diff --git a/app/openssl/crypto/asn1/a_int.c b/app/openssl/crypto/asn1/a_int.c index 3348b876..297c45a9 100644 --- a/app/openssl/crypto/asn1/a_int.c +++ b/app/openssl/crypto/asn1/a_int.c @@ -116,7 +116,7 @@ int i2c_ASN1_INTEGER(ASN1_INTEGER *a, unsigned char **pp) int pad=0,ret,i,neg; unsigned char *p,*n,pb=0; - if ((a == NULL) || (a->data == NULL)) return(0); + if (a == NULL) return(0); neg=a->type & V_ASN1_NEG; if (a->length == 0) ret=1; @@ -386,8 +386,8 @@ long ASN1_INTEGER_get(const ASN1_INTEGER *a) if (a->length > (int)sizeof(long)) { - /* hmm... a bit ugly */ - return(0xffffffffL); + /* hmm... a bit ugly, return all ones */ + return -1; } if (a->data == NULL) return 0; diff --git a/app/openssl/crypto/asn1/a_sign.c b/app/openssl/crypto/asn1/a_sign.c index ff63bfc7..7b4a193d 100644 --- a/app/openssl/crypto/asn1/a_sign.c +++ b/app/openssl/crypto/asn1/a_sign.c @@ -184,9 +184,9 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, p=buf_in; i2d(data,&p); - EVP_SignInit_ex(&ctx,type, NULL); - EVP_SignUpdate(&ctx,(unsigned char *)buf_in,inl); - if (!EVP_SignFinal(&ctx,(unsigned char *)buf_out, + if (!EVP_SignInit_ex(&ctx,type, NULL) + || !EVP_SignUpdate(&ctx,(unsigned char *)buf_in,inl) + || !EVP_SignFinal(&ctx,(unsigned char *)buf_out, (unsigned int *)&outl,pkey)) { outl=0; @@ -218,65 +218,100 @@ int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, const EVP_MD *type) { EVP_MD_CTX ctx; + EVP_MD_CTX_init(&ctx); + if (!EVP_DigestSignInit(&ctx, NULL, type, NULL, pkey)) + { + EVP_MD_CTX_cleanup(&ctx); + return 0; + } + return ASN1_item_sign_ctx(it, algor1, algor2, signature, asn, &ctx); + } + + +int ASN1_item_sign_ctx(const ASN1_ITEM *it, + X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, void *asn, EVP_MD_CTX *ctx) + { + const EVP_MD *type; + EVP_PKEY *pkey; unsigned char *buf_in=NULL,*buf_out=NULL; - int inl=0,outl=0,outll=0; + size_t inl=0,outl=0,outll=0; int signid, paramtype; + int rv; + + type = EVP_MD_CTX_md(ctx); + pkey = EVP_PKEY_CTX_get0_pkey(ctx->pctx); - if (type == NULL) + if (!type || !pkey) { - int def_nid; - if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0) - type = EVP_get_digestbynid(def_nid); + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ASN1_R_CONTEXT_NOT_INITIALISED); + return 0; } - if (type == NULL) + if (pkey->ameth->item_sign) { - ASN1err(ASN1_F_ASN1_ITEM_SIGN, ASN1_R_NO_DEFAULT_DIGEST); - return 0; + rv = pkey->ameth->item_sign(ctx, it, asn, algor1, algor2, + signature); + if (rv == 1) + outl = signature->length; + /* Return value meanings: + * <=0: error. + * 1: method does everything. + * 2: carry on as normal. + * 3: ASN1 method sets algorithm identifiers: just sign. + */ + if (rv <= 0) + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ERR_R_EVP_LIB); + if (rv <= 1) + goto err; } + else + rv = 2; - if (type->flags & EVP_MD_FLAG_PKEY_METHOD_SIGNATURE) + if (rv == 2) { - if (!pkey->ameth || - !OBJ_find_sigid_by_algs(&signid, EVP_MD_nid(type), - pkey->ameth->pkey_id)) + if (type->flags & EVP_MD_FLAG_PKEY_METHOD_SIGNATURE) { - ASN1err(ASN1_F_ASN1_ITEM_SIGN, - ASN1_R_DIGEST_AND_KEY_TYPE_NOT_SUPPORTED); - return 0; + if (!pkey->ameth || + !OBJ_find_sigid_by_algs(&signid, + EVP_MD_nid(type), + pkey->ameth->pkey_id)) + { + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, + ASN1_R_DIGEST_AND_KEY_TYPE_NOT_SUPPORTED); + return 0; + } } - } - else - signid = type->pkey_type; + else + signid = type->pkey_type; - if (pkey->ameth->pkey_flags & ASN1_PKEY_SIGPARAM_NULL) - paramtype = V_ASN1_NULL; - else - paramtype = V_ASN1_UNDEF; + if (pkey->ameth->pkey_flags & ASN1_PKEY_SIGPARAM_NULL) + paramtype = V_ASN1_NULL; + else + paramtype = V_ASN1_UNDEF; - if (algor1) - X509_ALGOR_set0(algor1, OBJ_nid2obj(signid), paramtype, NULL); - if (algor2) - X509_ALGOR_set0(algor2, OBJ_nid2obj(signid), paramtype, NULL); + if (algor1) + X509_ALGOR_set0(algor1, OBJ_nid2obj(signid), paramtype, NULL); + if (algor2) + X509_ALGOR_set0(algor2, OBJ_nid2obj(signid), paramtype, NULL); + + } - EVP_MD_CTX_init(&ctx); inl=ASN1_item_i2d(asn,&buf_in, it); outll=outl=EVP_PKEY_size(pkey); - buf_out=(unsigned char *)OPENSSL_malloc((unsigned int)outl); + buf_out=OPENSSL_malloc((unsigned int)outl); if ((buf_in == NULL) || (buf_out == NULL)) { outl=0; - ASN1err(ASN1_F_ASN1_ITEM_SIGN,ERR_R_MALLOC_FAILURE); + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX,ERR_R_MALLOC_FAILURE); goto err; } - EVP_SignInit_ex(&ctx,type, NULL); - EVP_SignUpdate(&ctx,(unsigned char *)buf_in,inl); - if (!EVP_SignFinal(&ctx,(unsigned char *)buf_out, - (unsigned int *)&outl,pkey)) + if (!EVP_DigestSignUpdate(ctx, buf_in, inl) + || !EVP_DigestSignFinal(ctx, buf_out, &outl)) { outl=0; - ASN1err(ASN1_F_ASN1_ITEM_SIGN,ERR_R_EVP_LIB); + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX,ERR_R_EVP_LIB); goto err; } if (signature->data != NULL) OPENSSL_free(signature->data); @@ -289,7 +324,7 @@ int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, signature->flags&= ~(ASN1_STRING_FLAG_BITS_LEFT|0x07); signature->flags|=ASN1_STRING_FLAG_BITS_LEFT; err: - EVP_MD_CTX_cleanup(&ctx); + EVP_MD_CTX_cleanup(ctx); if (buf_in != NULL) { OPENSSL_cleanse((char *)buf_in,(unsigned int)inl); OPENSSL_free(buf_in); } if (buf_out != NULL) diff --git a/app/openssl/crypto/asn1/a_strex.c b/app/openssl/crypto/asn1/a_strex.c index 264ebf23..ead37ac3 100644 --- a/app/openssl/crypto/asn1/a_strex.c +++ b/app/openssl/crypto/asn1/a_strex.c @@ -567,6 +567,7 @@ int ASN1_STRING_to_UTF8(unsigned char **out, ASN1_STRING *in) if(mbflag == -1) return -1; mbflag |= MBSTRING_FLAG; stmp.data = NULL; + stmp.length = 0; ret = ASN1_mbstring_copy(&str, in->data, in->length, mbflag, B_ASN1_UTF8STRING); if(ret < 0) return ret; *out = stmp.data; diff --git a/app/openssl/crypto/asn1/a_verify.c b/app/openssl/crypto/asn1/a_verify.c index cecdb13c..fc84cd3d 100644 --- a/app/openssl/crypto/asn1/a_verify.c +++ b/app/openssl/crypto/asn1/a_verify.c @@ -101,8 +101,13 @@ int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *a, ASN1_BIT_STRING *signature, p=buf_in; i2d(data,&p); - EVP_VerifyInit_ex(&ctx,type, NULL); - EVP_VerifyUpdate(&ctx,(unsigned char *)buf_in,inl); + if (!EVP_VerifyInit_ex(&ctx,type, NULL) + || !EVP_VerifyUpdate(&ctx,(unsigned char *)buf_in,inl)) + { + ASN1err(ASN1_F_ASN1_VERIFY,ERR_R_EVP_LIB); + ret=0; + goto err; + } OPENSSL_cleanse(buf_in,(unsigned int)inl); OPENSSL_free(buf_in); @@ -126,16 +131,21 @@ err: #endif -int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, ASN1_BIT_STRING *signature, - void *asn, EVP_PKEY *pkey) +int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, + ASN1_BIT_STRING *signature, void *asn, EVP_PKEY *pkey) { EVP_MD_CTX ctx; - const EVP_MD *type = NULL; unsigned char *buf_in=NULL; int ret= -1,inl; int mdnid, pknid; + if (!pkey) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_PASSED_NULL_PARAMETER); + return -1; + } + EVP_MD_CTX_init(&ctx); /* Convert signature OID into digest and public key OIDs */ @@ -144,25 +154,47 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, ASN1_BIT_STRING *signat ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_UNKNOWN_SIGNATURE_ALGORITHM); goto err; } - type=EVP_get_digestbynid(mdnid); - if (type == NULL) + if (mdnid == NID_undef) { - ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM); - goto err; + if (!pkey->ameth || !pkey->ameth->item_verify) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_UNKNOWN_SIGNATURE_ALGORITHM); + goto err; + } + ret = pkey->ameth->item_verify(&ctx, it, asn, a, + signature, pkey); + /* Return value of 2 means carry on, anything else means we + * exit straight away: either a fatal error of the underlying + * verification routine handles all verification. + */ + if (ret != 2) + goto err; + ret = -1; } - - /* Check public key OID matches public key type */ - if (EVP_PKEY_type(pknid) != pkey->ameth->pkey_id) + else { - ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_WRONG_PUBLIC_KEY_TYPE); - goto err; - } + const EVP_MD *type; + type=EVP_get_digestbynid(mdnid); + if (type == NULL) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM); + goto err; + } + + /* Check public key OID matches public key type */ + if (EVP_PKEY_type(pknid) != pkey->ameth->pkey_id) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ASN1_R_WRONG_PUBLIC_KEY_TYPE); + goto err; + } + + if (!EVP_DigestVerifyInit(&ctx, NULL, type, NULL, pkey)) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ERR_R_EVP_LIB); + ret=0; + goto err; + } - if (!EVP_VerifyInit_ex(&ctx,type, NULL)) - { - ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ERR_R_EVP_LIB); - ret=0; - goto err; } inl = ASN1_item_i2d(asn, &buf_in, it); @@ -173,13 +205,18 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, ASN1_BIT_STRING *signat goto err; } - EVP_VerifyUpdate(&ctx,(unsigned char *)buf_in,inl); + if (!EVP_DigestVerifyUpdate(&ctx,buf_in,inl)) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ERR_R_EVP_LIB); + ret=0; + goto err; + } OPENSSL_cleanse(buf_in,(unsigned int)inl); OPENSSL_free(buf_in); - if (EVP_VerifyFinal(&ctx,(unsigned char *)signature->data, - (unsigned int)signature->length,pkey) <= 0) + if (EVP_DigestVerifyFinal(&ctx,signature->data, + (size_t)signature->length) <= 0) { ASN1err(ASN1_F_ASN1_ITEM_VERIFY,ERR_R_EVP_LIB); ret=0; diff --git a/app/openssl/crypto/asn1/ameth_lib.c b/app/openssl/crypto/asn1/ameth_lib.c index 5a581b90..a19e058f 100644 --- a/app/openssl/crypto/asn1/ameth_lib.c +++ b/app/openssl/crypto/asn1/ameth_lib.c @@ -69,6 +69,7 @@ extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[]; extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth; extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth; +extern const EVP_PKEY_ASN1_METHOD cmac_asn1_meth; /* Keep this sorted in type order !! */ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = @@ -90,7 +91,8 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = #ifndef OPENSSL_NO_EC &eckey_asn1_meth, #endif - &hmac_asn1_meth + &hmac_asn1_meth, + &cmac_asn1_meth }; typedef int sk_cmp_fn_type(const char * const *a, const char * const *b); @@ -291,6 +293,8 @@ EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags, if (!ameth) return NULL; + memset(ameth, 0, sizeof(EVP_PKEY_ASN1_METHOD)); + ameth->pkey_id = id; ameth->pkey_base_id = id; ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC; @@ -325,6 +329,9 @@ EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags, ameth->old_priv_encode = 0; ameth->old_priv_decode = 0; + ameth->item_verify = 0; + ameth->item_sign = 0; + ameth->pkey_size = 0; ameth->pkey_bits = 0; @@ -376,6 +383,9 @@ void EVP_PKEY_asn1_copy(EVP_PKEY_ASN1_METHOD *dst, dst->pkey_free = src->pkey_free; dst->pkey_ctrl = src->pkey_ctrl; + dst->item_sign = src->item_sign; + dst->item_verify = src->item_verify; + } void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth) diff --git a/app/openssl/crypto/asn1/asn1.h b/app/openssl/crypto/asn1/asn1.h index 59540e4e..220a0c8c 100644 --- a/app/openssl/crypto/asn1/asn1.h +++ b/app/openssl/crypto/asn1/asn1.h @@ -235,7 +235,7 @@ typedef struct asn1_object_st */ #define ASN1_STRING_FLAG_MSTRING 0x040 /* This is the base type that holds just about everything :-) */ -typedef struct asn1_string_st +struct asn1_string_st { int length; int type; @@ -245,7 +245,7 @@ typedef struct asn1_string_st * input data has a non-zero 'unused bits' value, it will be * handled correctly */ long flags; - } ASN1_STRING; + }; /* ASN1_ENCODING structure: this is used to save the received * encoding of an ASN1 type. This is useful to get round @@ -293,7 +293,6 @@ DECLARE_STACK_OF(ASN1_STRING_TABLE) * see asn1t.h */ typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE; -typedef struct ASN1_ITEM_st ASN1_ITEM; typedef struct ASN1_TLC_st ASN1_TLC; /* This is just an opaque pointer */ typedef struct ASN1_VALUE_st ASN1_VALUE; @@ -1194,6 +1193,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_F_ASN1_ITEM_I2D_FP 193 #define ASN1_F_ASN1_ITEM_PACK 198 #define ASN1_F_ASN1_ITEM_SIGN 195 +#define ASN1_F_ASN1_ITEM_SIGN_CTX 220 #define ASN1_F_ASN1_ITEM_UNPACK 199 #define ASN1_F_ASN1_ITEM_VERIFY 197 #define ASN1_F_ASN1_MBSTRING_NCOPY 122 @@ -1266,6 +1266,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_F_PKCS5_PBE2_SET_IV 167 #define ASN1_F_PKCS5_PBE_SET 202 #define ASN1_F_PKCS5_PBE_SET0_ALGOR 215 +#define ASN1_F_PKCS5_PBKDF2_SET 219 #define ASN1_F_SMIME_READ_ASN1 212 #define ASN1_F_SMIME_TEXT 213 #define ASN1_F_X509_CINF_NEW 168 @@ -1291,6 +1292,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_R_BOOLEAN_IS_WRONG_LENGTH 106 #define ASN1_R_BUFFER_TOO_SMALL 107 #define ASN1_R_CIPHER_HAS_NO_OBJECT_IDENTIFIER 108 +#define ASN1_R_CONTEXT_NOT_INITIALISED 217 #define ASN1_R_DATA_IS_WRONG 109 #define ASN1_R_DECODE_ERROR 110 #define ASN1_R_DECODING_ERROR 111 diff --git a/app/openssl/crypto/asn1/asn1_err.c b/app/openssl/crypto/asn1/asn1_err.c index 6e04d08f..aa60203b 100644 --- a/app/openssl/crypto/asn1/asn1_err.c +++ b/app/openssl/crypto/asn1/asn1_err.c @@ -1,6 +1,6 @@ /* crypto/asn1/asn1_err.c */ /* ==================================================================== - * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -107,6 +107,7 @@ static ERR_STRING_DATA ASN1_str_functs[]= {ERR_FUNC(ASN1_F_ASN1_ITEM_I2D_FP), "ASN1_item_i2d_fp"}, {ERR_FUNC(ASN1_F_ASN1_ITEM_PACK), "ASN1_item_pack"}, {ERR_FUNC(ASN1_F_ASN1_ITEM_SIGN), "ASN1_item_sign"}, +{ERR_FUNC(ASN1_F_ASN1_ITEM_SIGN_CTX), "ASN1_item_sign_ctx"}, {ERR_FUNC(ASN1_F_ASN1_ITEM_UNPACK), "ASN1_item_unpack"}, {ERR_FUNC(ASN1_F_ASN1_ITEM_VERIFY), "ASN1_item_verify"}, {ERR_FUNC(ASN1_F_ASN1_MBSTRING_NCOPY), "ASN1_mbstring_ncopy"}, @@ -179,6 +180,7 @@ static ERR_STRING_DATA ASN1_str_functs[]= {ERR_FUNC(ASN1_F_PKCS5_PBE2_SET_IV), "PKCS5_pbe2_set_iv"}, {ERR_FUNC(ASN1_F_PKCS5_PBE_SET), "PKCS5_pbe_set"}, {ERR_FUNC(ASN1_F_PKCS5_PBE_SET0_ALGOR), "PKCS5_pbe_set0_algor"}, +{ERR_FUNC(ASN1_F_PKCS5_PBKDF2_SET), "PKCS5_pbkdf2_set"}, {ERR_FUNC(ASN1_F_SMIME_READ_ASN1), "SMIME_read_ASN1"}, {ERR_FUNC(ASN1_F_SMIME_TEXT), "SMIME_text"}, {ERR_FUNC(ASN1_F_X509_CINF_NEW), "X509_CINF_NEW"}, @@ -207,6 +209,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]= {ERR_REASON(ASN1_R_BOOLEAN_IS_WRONG_LENGTH),"boolean is wrong length"}, {ERR_REASON(ASN1_R_BUFFER_TOO_SMALL) ,"buffer too small"}, {ERR_REASON(ASN1_R_CIPHER_HAS_NO_OBJECT_IDENTIFIER),"cipher has no object identifier"}, +{ERR_REASON(ASN1_R_CONTEXT_NOT_INITIALISED),"context not initialised"}, {ERR_REASON(ASN1_R_DATA_IS_WRONG) ,"data is wrong"}, {ERR_REASON(ASN1_R_DECODE_ERROR) ,"decode error"}, {ERR_REASON(ASN1_R_DECODING_ERROR) ,"decoding error"}, @@ -302,7 +305,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]= {ERR_REASON(ASN1_R_UNKNOWN_PUBLIC_KEY_TYPE),"unknown public key type"}, {ERR_REASON(ASN1_R_UNKNOWN_SIGNATURE_ALGORITHM),"unknown signature algorithm"}, {ERR_REASON(ASN1_R_UNKNOWN_TAG) ,"unknown tag"}, -{ERR_REASON(ASN1_R_UNKOWN_FORMAT) ,"unkown format"}, +{ERR_REASON(ASN1_R_UNKOWN_FORMAT) ,"unknown format"}, {ERR_REASON(ASN1_R_UNSUPPORTED_ANY_DEFINED_BY_TYPE),"unsupported any defined by type"}, {ERR_REASON(ASN1_R_UNSUPPORTED_CIPHER) ,"unsupported cipher"}, {ERR_REASON(ASN1_R_UNSUPPORTED_ENCRYPTION_ALGORITHM),"unsupported encryption algorithm"}, diff --git a/app/openssl/crypto/asn1/asn1_locl.h b/app/openssl/crypto/asn1/asn1_locl.h index 5aa65e28..9fcf0d95 100644 --- a/app/openssl/crypto/asn1/asn1_locl.h +++ b/app/openssl/crypto/asn1/asn1_locl.h @@ -102,6 +102,10 @@ struct evp_pkey_asn1_method_st int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b); int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent, ASN1_PCTX *pctx); + int (*sig_print)(BIO *out, + const X509_ALGOR *sigalg, const ASN1_STRING *sig, + int indent, ASN1_PCTX *pctx); + void (*pkey_free)(EVP_PKEY *pkey); int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2); @@ -111,6 +115,13 @@ struct evp_pkey_asn1_method_st int (*old_priv_decode)(EVP_PKEY *pkey, const unsigned char **pder, int derlen); int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder); + /* Custom ASN1 signature verification */ + int (*item_verify)(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *a, ASN1_BIT_STRING *sig, + EVP_PKEY *pkey); + int (*item_sign)(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *alg1, X509_ALGOR *alg2, + ASN1_BIT_STRING *sig); } /* EVP_PKEY_ASN1_METHOD */; diff --git a/app/openssl/crypto/asn1/asn_mime.c b/app/openssl/crypto/asn1/asn_mime.c index c1d1b122..54a704a9 100644 --- a/app/openssl/crypto/asn1/asn_mime.c +++ b/app/openssl/crypto/asn1/asn_mime.c @@ -377,8 +377,12 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags, BIO *tmpbio; const ASN1_AUX *aux = it->funcs; ASN1_STREAM_ARG sarg; + int rv = 1; - if (!(flags & SMIME_DETACHED)) + /* If data is not deteched or resigning then the output BIO is + * already set up to finalise when it is written through. + */ + if (!(flags & SMIME_DETACHED) || (flags & PKCS7_REUSE_DIGEST)) { SMIME_crlf_copy(data, out, flags); return 1; @@ -405,7 +409,7 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags, /* Finalize structure */ if (aux->asn1_cb(ASN1_OP_DETACHED_POST, &val, it, &sarg) <= 0) - return 0; + rv = 0; /* Now remove any digests prepended to the BIO */ @@ -416,7 +420,7 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags, sarg.ndef_bio = tmpbio; } - return 1; + return rv; } @@ -486,9 +490,9 @@ ASN1_VALUE *SMIME_read_ASN1(BIO *bio, BIO **bcont, const ASN1_ITEM *it) if(strcmp(hdr->value, "application/x-pkcs7-signature") && strcmp(hdr->value, "application/pkcs7-signature")) { - sk_MIME_HEADER_pop_free(headers, mime_hdr_free); ASN1err(ASN1_F_SMIME_READ_ASN1,ASN1_R_SIG_INVALID_MIME_TYPE); ERR_add_error_data(2, "type: ", hdr->value); + sk_MIME_HEADER_pop_free(headers, mime_hdr_free); sk_BIO_pop_free(parts, BIO_vfree); return NULL; } @@ -801,7 +805,7 @@ static MIME_HEADER *mime_hdr_new(char *name, char *value) if(name) { if(!(tmpname = BUF_strdup(name))) return NULL; for(p = tmpname ; *p; p++) { - c = *p; + c = (unsigned char)*p; if(isupper(c)) { c = tolower(c); *p = c; @@ -811,7 +815,7 @@ static MIME_HEADER *mime_hdr_new(char *name, char *value) if(value) { if(!(tmpval = BUF_strdup(value))) return NULL; for(p = tmpval ; *p; p++) { - c = *p; + c = (unsigned char)*p; if(isupper(c)) { c = tolower(c); *p = c; @@ -835,7 +839,7 @@ static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value) tmpname = BUF_strdup(name); if(!tmpname) return 0; for(p = tmpname ; *p; p++) { - c = *p; + c = (unsigned char)*p; if(isupper(c)) { c = tolower(c); *p = c; @@ -858,12 +862,17 @@ static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value) static int mime_hdr_cmp(const MIME_HEADER * const *a, const MIME_HEADER * const *b) { + if (!(*a)->name || !(*b)->name) + return !!(*a)->name - !!(*b)->name; + return(strcmp((*a)->name, (*b)->name)); } static int mime_param_cmp(const MIME_PARAM * const *a, const MIME_PARAM * const *b) { + if (!(*a)->param_name || !(*b)->param_name) + return !!(*a)->param_name - !!(*b)->param_name; return(strcmp((*a)->param_name, (*b)->param_name)); } diff --git a/app/openssl/crypto/asn1/n_pkey.c b/app/openssl/crypto/asn1/n_pkey.c index e7d04390..e2517399 100644 --- a/app/openssl/crypto/asn1/n_pkey.c +++ b/app/openssl/crypto/asn1/n_pkey.c @@ -129,6 +129,7 @@ int i2d_RSA_NET(const RSA *a, unsigned char **pp, unsigned char buf[256],*zz; unsigned char key[EVP_MAX_KEY_LENGTH]; EVP_CIPHER_CTX ctx; + EVP_CIPHER_CTX_init(&ctx); if (a == NULL) return(0); @@ -206,24 +207,28 @@ int i2d_RSA_NET(const RSA *a, unsigned char **pp, i = strlen((char *)buf); /* If the key is used for SGC the algorithm is modified a little. */ if(sgckey) { - EVP_Digest(buf, i, buf, NULL, EVP_md5(), NULL); + if (!EVP_Digest(buf, i, buf, NULL, EVP_md5(), NULL)) + goto err; memcpy(buf + 16, "SGCKEYSALT", 10); i = 26; } - EVP_BytesToKey(EVP_rc4(),EVP_md5(),NULL,buf,i,1,key,NULL); + if (!EVP_BytesToKey(EVP_rc4(),EVP_md5(),NULL,buf,i,1,key,NULL)) + goto err; OPENSSL_cleanse(buf,256); /* Encrypt private key in place */ zz = enckey->enckey->digest->data; - EVP_CIPHER_CTX_init(&ctx); - EVP_EncryptInit_ex(&ctx,EVP_rc4(),NULL,key,NULL); - EVP_EncryptUpdate(&ctx,zz,&i,zz,pkeylen); - EVP_EncryptFinal_ex(&ctx,zz + i,&j); - EVP_CIPHER_CTX_cleanup(&ctx); + if (!EVP_EncryptInit_ex(&ctx,EVP_rc4(),NULL,key,NULL)) + goto err; + if (!EVP_EncryptUpdate(&ctx,zz,&i,zz,pkeylen)) + goto err; + if (!EVP_EncryptFinal_ex(&ctx,zz + i,&j)) + goto err; ret = i2d_NETSCAPE_ENCRYPTED_PKEY(enckey, pp); err: + EVP_CIPHER_CTX_cleanup(&ctx); NETSCAPE_ENCRYPTED_PKEY_free(enckey); NETSCAPE_PKEY_free(pkey); return(ret); @@ -288,6 +293,7 @@ static RSA *d2i_RSA_NET_2(RSA **a, ASN1_OCTET_STRING *os, const unsigned char *zz; unsigned char key[EVP_MAX_KEY_LENGTH]; EVP_CIPHER_CTX ctx; + EVP_CIPHER_CTX_init(&ctx); i=cb((char *)buf,256,"Enter Private Key password:",0); if (i != 0) @@ -298,19 +304,22 @@ static RSA *d2i_RSA_NET_2(RSA **a, ASN1_OCTET_STRING *os, i = strlen((char *)buf); if(sgckey){ - EVP_Digest(buf, i, buf, NULL, EVP_md5(), NULL); + if (!EVP_Digest(buf, i, buf, NULL, EVP_md5(), NULL)) + goto err; memcpy(buf + 16, "SGCKEYSALT", 10); i = 26; } - EVP_BytesToKey(EVP_rc4(),EVP_md5(),NULL,buf,i,1,key,NULL); + if (!EVP_BytesToKey(EVP_rc4(),EVP_md5(),NULL,buf,i,1,key,NULL)) + goto err; OPENSSL_cleanse(buf,256); - EVP_CIPHER_CTX_init(&ctx); - EVP_DecryptInit_ex(&ctx,EVP_rc4(),NULL, key,NULL); - EVP_DecryptUpdate(&ctx,os->data,&i,os->data,os->length); - EVP_DecryptFinal_ex(&ctx,&(os->data[i]),&j); - EVP_CIPHER_CTX_cleanup(&ctx); + if (!EVP_DecryptInit_ex(&ctx,EVP_rc4(),NULL, key,NULL)) + goto err; + if (!EVP_DecryptUpdate(&ctx,os->data,&i,os->data,os->length)) + goto err; + if (!EVP_DecryptFinal_ex(&ctx,&(os->data[i]),&j)) + goto err; os->length=i+j; zz=os->data; @@ -328,6 +337,7 @@ static RSA *d2i_RSA_NET_2(RSA **a, ASN1_OCTET_STRING *os, goto err; } err: + EVP_CIPHER_CTX_cleanup(&ctx); NETSCAPE_PKEY_free(pkey); return(ret); } diff --git a/app/openssl/crypto/asn1/p5_pbev2.c b/app/openssl/crypto/asn1/p5_pbev2.c index cb49b665..4ea68303 100644 --- a/app/openssl/crypto/asn1/p5_pbev2.c +++ b/app/openssl/crypto/asn1/p5_pbev2.c @@ -91,12 +91,10 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, unsigned char *aiv, int prf_nid) { X509_ALGOR *scheme = NULL, *kalg = NULL, *ret = NULL; - int alg_nid; + int alg_nid, keylen; EVP_CIPHER_CTX ctx; unsigned char iv[EVP_MAX_IV_LENGTH]; - PBKDF2PARAM *kdf = NULL; PBE2PARAM *pbe2 = NULL; - ASN1_OCTET_STRING *osalt = NULL; ASN1_OBJECT *obj; alg_nid = EVP_CIPHER_type(cipher); @@ -127,7 +125,8 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, EVP_CIPHER_CTX_init(&ctx); /* Dummy cipherinit to just setup the IV, and PRF */ - EVP_CipherInit_ex(&ctx, cipher, NULL, NULL, iv, 0); + if (!EVP_CipherInit_ex(&ctx, cipher, NULL, NULL, iv, 0)) + goto err; if(EVP_CIPHER_param_to_asn1(&ctx, scheme->parameter) < 0) { ASN1err(ASN1_F_PKCS5_PBE2_SET_IV, ASN1_R_ERROR_SETTING_CIPHER_PARAMS); @@ -145,55 +144,21 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, } EVP_CIPHER_CTX_cleanup(&ctx); - if(!(kdf = PBKDF2PARAM_new())) goto merr; - if(!(osalt = M_ASN1_OCTET_STRING_new())) goto merr; - - if (!saltlen) saltlen = PKCS5_SALT_LEN; - if (!(osalt->data = OPENSSL_malloc (saltlen))) goto merr; - osalt->length = saltlen; - if (salt) memcpy (osalt->data, salt, saltlen); - else if (RAND_pseudo_bytes (osalt->data, saltlen) < 0) goto merr; - - if(iter <= 0) iter = PKCS5_DEFAULT_ITER; - if(!ASN1_INTEGER_set(kdf->iter, iter)) goto merr; - - /* Now include salt in kdf structure */ - kdf->salt->value.octet_string = osalt; - kdf->salt->type = V_ASN1_OCTET_STRING; - osalt = NULL; - /* If its RC2 then we'd better setup the key length */ - if(alg_nid == NID_rc2_cbc) { - if(!(kdf->keylength = M_ASN1_INTEGER_new())) goto merr; - if(!ASN1_INTEGER_set (kdf->keylength, - EVP_CIPHER_key_length(cipher))) goto merr; - } - - /* prf can stay NULL if we are using hmacWithSHA1 */ - if (prf_nid != NID_hmacWithSHA1) - { - kdf->prf = X509_ALGOR_new(); - if (!kdf->prf) - goto merr; - X509_ALGOR_set0(kdf->prf, OBJ_nid2obj(prf_nid), - V_ASN1_NULL, NULL); - } - - /* Now setup the PBE2PARAM keyfunc structure */ + if(alg_nid == NID_rc2_cbc) + keylen = EVP_CIPHER_key_length(cipher); + else + keylen = -1; - pbe2->keyfunc->algorithm = OBJ_nid2obj(NID_id_pbkdf2); + /* Setup keyfunc */ - /* Encode PBKDF2PARAM into parameter of pbe2 */ + X509_ALGOR_free(pbe2->keyfunc); - if(!(pbe2->keyfunc->parameter = ASN1_TYPE_new())) goto merr; + pbe2->keyfunc = PKCS5_pbkdf2_set(iter, salt, saltlen, prf_nid, keylen); - if(!ASN1_item_pack(kdf, ASN1_ITEM_rptr(PBKDF2PARAM), - &pbe2->keyfunc->parameter->value.sequence)) goto merr; - pbe2->keyfunc->parameter->type = V_ASN1_SEQUENCE; - - PBKDF2PARAM_free(kdf); - kdf = NULL; + if (!pbe2->keyfunc) + goto merr; /* Now set up top level AlgorithmIdentifier */ @@ -219,8 +184,6 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, err: PBE2PARAM_free(pbe2); /* Note 'scheme' is freed as part of pbe2 */ - M_ASN1_OCTET_STRING_free(osalt); - PBKDF2PARAM_free(kdf); X509_ALGOR_free(kalg); X509_ALGOR_free(ret); @@ -233,3 +196,85 @@ X509_ALGOR *PKCS5_pbe2_set(const EVP_CIPHER *cipher, int iter, { return PKCS5_pbe2_set_iv(cipher, iter, salt, saltlen, NULL, -1); } + +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen) + { + X509_ALGOR *keyfunc = NULL; + PBKDF2PARAM *kdf = NULL; + ASN1_OCTET_STRING *osalt = NULL; + + if(!(kdf = PBKDF2PARAM_new())) + goto merr; + if(!(osalt = M_ASN1_OCTET_STRING_new())) + goto merr; + + kdf->salt->value.octet_string = osalt; + kdf->salt->type = V_ASN1_OCTET_STRING; + + if (!saltlen) + saltlen = PKCS5_SALT_LEN; + if (!(osalt->data = OPENSSL_malloc (saltlen))) + goto merr; + + osalt->length = saltlen; + + if (salt) + memcpy (osalt->data, salt, saltlen); + else if (RAND_pseudo_bytes (osalt->data, saltlen) < 0) + goto merr; + + if(iter <= 0) + iter = PKCS5_DEFAULT_ITER; + + if(!ASN1_INTEGER_set(kdf->iter, iter)) + goto merr; + + /* If have a key len set it up */ + + if(keylen > 0) + { + if(!(kdf->keylength = M_ASN1_INTEGER_new())) + goto merr; + if(!ASN1_INTEGER_set (kdf->keylength, keylen)) + goto merr; + } + + /* prf can stay NULL if we are using hmacWithSHA1 */ + if (prf_nid > 0 && prf_nid != NID_hmacWithSHA1) + { + kdf->prf = X509_ALGOR_new(); + if (!kdf->prf) + goto merr; + X509_ALGOR_set0(kdf->prf, OBJ_nid2obj(prf_nid), + V_ASN1_NULL, NULL); + } + + /* Finally setup the keyfunc structure */ + + keyfunc = X509_ALGOR_new(); + if (!keyfunc) + goto merr; + + keyfunc->algorithm = OBJ_nid2obj(NID_id_pbkdf2); + + /* Encode PBKDF2PARAM into parameter of pbe2 */ + + if(!(keyfunc->parameter = ASN1_TYPE_new())) + goto merr; + + if(!ASN1_item_pack(kdf, ASN1_ITEM_rptr(PBKDF2PARAM), + &keyfunc->parameter->value.sequence)) + goto merr; + keyfunc->parameter->type = V_ASN1_SEQUENCE; + + PBKDF2PARAM_free(kdf); + return keyfunc; + + merr: + ASN1err(ASN1_F_PKCS5_PBKDF2_SET,ERR_R_MALLOC_FAILURE); + PBKDF2PARAM_free(kdf); + X509_ALGOR_free(keyfunc); + return NULL; + } + diff --git a/app/openssl/crypto/asn1/t_crl.c b/app/openssl/crypto/asn1/t_crl.c index ee5a687c..c6116920 100644 --- a/app/openssl/crypto/asn1/t_crl.c +++ b/app/openssl/crypto/asn1/t_crl.c @@ -94,8 +94,7 @@ int X509_CRL_print(BIO *out, X509_CRL *x) l = X509_CRL_get_version(x); BIO_printf(out, "%8sVersion %lu (0x%lx)\n", "", l+1, l); i = OBJ_obj2nid(x->sig_alg->algorithm); - BIO_printf(out, "%8sSignature Algorithm: %s\n", "", - (i == NID_undef) ? "NONE" : OBJ_nid2ln(i)); + X509_signature_print(out, x->sig_alg, NULL); p=X509_NAME_oneline(X509_CRL_get_issuer(x),NULL,0); BIO_printf(out,"%8sIssuer: %s\n","",p); OPENSSL_free(p); diff --git a/app/openssl/crypto/asn1/t_x509.c b/app/openssl/crypto/asn1/t_x509.c index e061f2ff..edbb39a0 100644 --- a/app/openssl/crypto/asn1/t_x509.c +++ b/app/openssl/crypto/asn1/t_x509.c @@ -72,6 +72,7 @@ #include #include #include +#include "asn1_locl.h" #ifndef OPENSSL_NO_FP_API int X509_print_fp(FILE *fp, X509 *x) @@ -137,10 +138,10 @@ int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflags, unsigned long cflag) if (BIO_write(bp," Serial Number:",22) <= 0) goto err; bs=X509_get_serialNumber(x); - if (bs->length <= 4) + if (bs->length <= (int)sizeof(long)) { l=ASN1_INTEGER_get(bs); - if (l < 0) + if (bs->type == V_ASN1_NEG_INTEGER) { l= -l; neg="-"; @@ -167,12 +168,16 @@ int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflags, unsigned long cflag) if(!(cflag & X509_FLAG_NO_SIGNAME)) { + if(X509_signature_print(bp, x->sig_alg, NULL) <= 0) + goto err; +#if 0 if (BIO_printf(bp,"%8sSignature Algorithm: ","") <= 0) goto err; if (i2a_ASN1_OBJECT(bp, ci->signature->algorithm) <= 0) goto err; if (BIO_puts(bp, "\n") <= 0) goto err; +#endif } if(!(cflag & X509_FLAG_NO_ISSUER)) @@ -255,7 +260,8 @@ int X509_ocspid_print (BIO *bp, X509 *x) goto err; i2d_X509_NAME(x->cert_info->subject, &dertmp); - EVP_Digest(der, derlen, SHA1md, NULL, EVP_sha1(), NULL); + if (!EVP_Digest(der, derlen, SHA1md, NULL, EVP_sha1(), NULL)) + goto err; for (i=0; i < SHA_DIGEST_LENGTH; i++) { if (BIO_printf(bp,"%02X",SHA1md[i]) <= 0) goto err; @@ -268,8 +274,10 @@ int X509_ocspid_print (BIO *bp, X509 *x) if (BIO_printf(bp,"\n Public key OCSP hash: ") <= 0) goto err; - EVP_Digest(x->cert_info->key->public_key->data, - x->cert_info->key->public_key->length, SHA1md, NULL, EVP_sha1(), NULL); + if (!EVP_Digest(x->cert_info->key->public_key->data, + x->cert_info->key->public_key->length, + SHA1md, NULL, EVP_sha1(), NULL)) + goto err; for (i=0; i < SHA_DIGEST_LENGTH; i++) { if (BIO_printf(bp,"%02X",SHA1md[i]) <= 0) @@ -283,23 +291,50 @@ err: return(0); } -int X509_signature_print(BIO *bp, X509_ALGOR *sigalg, ASN1_STRING *sig) +int X509_signature_dump(BIO *bp, const ASN1_STRING *sig, int indent) { - unsigned char *s; + const unsigned char *s; int i, n; - if (BIO_puts(bp," Signature Algorithm: ") <= 0) return 0; - if (i2a_ASN1_OBJECT(bp, sigalg->algorithm) <= 0) return 0; n=sig->length; s=sig->data; for (i=0; ialgorithm) <= 0) return 0; + + sig_nid = OBJ_obj2nid(sigalg->algorithm); + if (sig_nid != NID_undef) + { + int pkey_nid, dig_nid; + const EVP_PKEY_ASN1_METHOD *ameth; + if (OBJ_find_sigid_algs(sig_nid, &dig_nid, &pkey_nid)) + { + ameth = EVP_PKEY_asn1_find(NULL, pkey_nid); + if (ameth && ameth->sig_print) + return ameth->sig_print(bp, sigalg, sig, 9, 0); + } + } + if (sig) + return X509_signature_dump(bp, sig, 9); + else if (BIO_puts(bp, "\n") <= 0) + return 0; return 1; } diff --git a/app/openssl/crypto/asn1/tasn_prn.c b/app/openssl/crypto/asn1/tasn_prn.c index 45369801..542a091a 100644 --- a/app/openssl/crypto/asn1/tasn_prn.c +++ b/app/openssl/crypto/asn1/tasn_prn.c @@ -446,11 +446,11 @@ static int asn1_print_fsname(BIO *out, int indent, return 1; } -static int asn1_print_boolean_ctx(BIO *out, const int bool, +static int asn1_print_boolean_ctx(BIO *out, int boolval, const ASN1_PCTX *pctx) { const char *str; - switch (bool) + switch (boolval) { case -1: str = "BOOL ABSENT"; @@ -574,10 +574,10 @@ static int asn1_primitive_print(BIO *out, ASN1_VALUE **fld, { case V_ASN1_BOOLEAN: { - int bool = *(int *)fld; - if (bool == -1) - bool = it->size; - ret = asn1_print_boolean_ctx(out, bool, pctx); + int boolval = *(int *)fld; + if (boolval == -1) + boolval = it->size; + ret = asn1_print_boolean_ctx(out, boolval, pctx); } break; diff --git a/app/openssl/crypto/asn1/x_algor.c b/app/openssl/crypto/asn1/x_algor.c index 99e53429..274e456c 100644 --- a/app/openssl/crypto/asn1/x_algor.c +++ b/app/openssl/crypto/asn1/x_algor.c @@ -128,3 +128,17 @@ void X509_ALGOR_get0(ASN1_OBJECT **paobj, int *pptype, void **ppval, } } +/* Set up an X509_ALGOR DigestAlgorithmIdentifier from an EVP_MD */ + +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md) + { + int param_type; + + if (md->flags & EVP_MD_FLAG_DIGALGID_ABSENT) + param_type = V_ASN1_UNDEF; + else + param_type = V_ASN1_NULL; + + X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); + + } diff --git a/app/openssl/crypto/asn1/x_name.c b/app/openssl/crypto/asn1/x_name.c index 49be08b4..d7c23186 100644 --- a/app/openssl/crypto/asn1/x_name.c +++ b/app/openssl/crypto/asn1/x_name.c @@ -399,8 +399,7 @@ static int asn1_string_canon(ASN1_STRING *out, ASN1_STRING *in) /* If type not in bitmask just copy string across */ if (!(ASN1_tag2bit(in->type) & ASN1_MASK_CANON)) { - out->type = in->type; - if (!ASN1_STRING_set(out, in->data, in->length)) + if (!ASN1_STRING_copy(out, in)) return 0; return 1; } diff --git a/app/openssl/crypto/asn1/x_pubkey.c b/app/openssl/crypto/asn1/x_pubkey.c index d42b6a2c..b649e1fc 100644 --- a/app/openssl/crypto/asn1/x_pubkey.c +++ b/app/openssl/crypto/asn1/x_pubkey.c @@ -171,7 +171,19 @@ EVP_PKEY *X509_PUBKEY_get(X509_PUBKEY *key) goto error; } - key->pkey = ret; + /* Check to see if another thread set key->pkey first */ + CRYPTO_w_lock(CRYPTO_LOCK_EVP_PKEY); + if (key->pkey) + { + CRYPTO_w_unlock(CRYPTO_LOCK_EVP_PKEY); + EVP_PKEY_free(ret); + ret = key->pkey; + } + else + { + key->pkey = ret; + CRYPTO_w_unlock(CRYPTO_LOCK_EVP_PKEY); + } CRYPTO_add(&ret->references, 1, CRYPTO_LOCK_EVP_PKEY); return ret; diff --git a/app/openssl/crypto/bf/asm/bf-586.S b/app/openssl/crypto/bf/asm/bf-586.S new file mode 100644 index 00000000..aa718d40 --- /dev/null +++ b/app/openssl/crypto/bf/asm/bf-586.S @@ -0,0 +1,896 @@ +.file "bf-586.s" +.text +.globl BF_encrypt +.type BF_encrypt,@function +.align 16 +BF_encrypt: +.L_BF_encrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl (%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 4(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 8(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 12(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 16(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 20(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 24(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 28(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 32(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 36(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 40(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 44(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 48(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 52(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 56(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 60(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 64(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl 68(%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_encrypt,.-.L_BF_encrypt_begin +.globl BF_decrypt +.type BF_decrypt,@function +.align 16 +BF_decrypt: +.L_BF_decrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl 68(%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 64(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 60(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 56(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 52(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 48(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 44(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 40(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 36(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 32(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 28(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 24(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 20(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 16(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 12(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 8(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 4(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl (%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_decrypt,.-.L_BF_decrypt_begin +.globl BF_cbc_encrypt +.type BF_cbc_encrypt,@function +.align 16 +BF_cbc_encrypt: +.L_BF_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + movl 48(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L000decrypt + andl $4294967288,%ebp + movl 8(%esp),%eax + movl 12(%esp),%ebx + jz .L001encrypt_finish +.L002encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L002encrypt_loop +.L001encrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + call .L004PIC_point +.L004PIC_point: + popl %edx + leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L006ej7: + movb 6(%esi),%dh + shll $8,%edx +.L007ej6: + movb 5(%esi),%dh +.L008ej5: + movb 4(%esi),%dl +.L009ej4: + movl (%esi),%ecx + jmp .L010ejend +.L011ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L012ej2: + movb 1(%esi),%ch +.L013ej1: + movb (%esi),%cl +.L010ejend: + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L003finish +.L000decrypt: + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L014decrypt_finish +.L015decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,16(%esp) + movl %ebx,20(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L015decrypt_loop +.L014decrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L016dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L017dj6: + movb %dh,5(%edi) +.L018dj5: + movb %dl,4(%edi) +.L019dj4: + movl %ecx,(%edi) + jmp .L020djend +.L021dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L022dj2: + movb %ch,1(%esi) +.L023dj1: + movb %cl,(%esi) +.L020djend: + jmp .L003finish +.L003finish: + movl 60(%esp),%ecx + addl $24,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L005cbc_enc_jmp_table: +.long 0 +.long .L013ej1-.L004PIC_point +.long .L012ej2-.L004PIC_point +.long .L011ej3-.L004PIC_point +.long .L009ej4-.L004PIC_point +.long .L008ej5-.L004PIC_point +.long .L007ej6-.L004PIC_point +.long .L006ej7-.L004PIC_point +.align 64 +.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin diff --git a/app/openssl/crypto/bf/bf_skey.c b/app/openssl/crypto/bf/bf_skey.c index 3673cdee..3b0bca41 100644 --- a/app/openssl/crypto/bf/bf_skey.c +++ b/app/openssl/crypto/bf/bf_skey.c @@ -58,11 +58,19 @@ #include #include +#include #include #include "bf_locl.h" #include "bf_pi.h" void BF_set_key(BF_KEY *key, int len, const unsigned char *data) +#ifdef OPENSSL_FIPS + { + fips_cipher_abort(BLOWFISH); + private_BF_set_key(key, len, data); + } +void private_BF_set_key(BF_KEY *key, int len, const unsigned char *data) +#endif { int i; BF_LONG *p,ri,in[2]; diff --git a/app/openssl/crypto/bf/blowfish.h b/app/openssl/crypto/bf/blowfish.h index b97e76f9..4b6c8920 100644 --- a/app/openssl/crypto/bf/blowfish.h +++ b/app/openssl/crypto/bf/blowfish.h @@ -104,7 +104,9 @@ typedef struct bf_key_st BF_LONG S[4*256]; } BF_KEY; - +#ifdef OPENSSL_FIPS +void private_BF_set_key(BF_KEY *key, int len, const unsigned char *data); +#endif void BF_set_key(BF_KEY *key, int len, const unsigned char *data); void BF_encrypt(BF_LONG *data,const BF_KEY *key); diff --git a/app/openssl/crypto/bio/b_sock.c b/app/openssl/crypto/bio/b_sock.c index d47310d6..470b1a00 100644 --- a/app/openssl/crypto/bio/b_sock.c +++ b/app/openssl/crypto/bio/b_sock.c @@ -629,7 +629,8 @@ int BIO_get_accept_socket(char *host, int bind_mode) struct sockaddr_in6 sa_in6; #endif } server,client; - int s=INVALID_SOCKET,cs,addrlen; + int s=INVALID_SOCKET,cs; + socklen_t addrlen; unsigned char ip[4]; unsigned short port; char *str=NULL,*e; @@ -704,10 +705,10 @@ int BIO_get_accept_socket(char *host, int bind_mode) if ((*p_getaddrinfo.f)(h,p,&hint,&res)) break; - addrlen = res->ai_addrlen<=sizeof(server) ? + addrlen = res->ai_addrlen <= (socklen_t)sizeof(server) ? res->ai_addrlen : - sizeof(server); - memcpy(&server, res->ai_addr, addrlen); + (socklen_t)sizeof(server); + memcpy(&server, res->ai_addr, (size_t)addrlen); (*p_freeaddrinfo.f)(res); goto again; @@ -719,7 +720,7 @@ int BIO_get_accept_socket(char *host, int bind_mode) memset((char *)&server,0,sizeof(server)); server.sa_in.sin_family=AF_INET; server.sa_in.sin_port=htons(port); - addrlen = sizeof(server.sa_in); + addrlen = (socklen_t)sizeof(server.sa_in); if (h == NULL || strcmp(h,"*") == 0) server.sa_in.sin_addr.s_addr=INADDR_ANY; @@ -960,7 +961,6 @@ int BIO_set_tcp_ndelay(int s, int on) #endif return(ret == 0); } -#endif int BIO_socket_nbio(int s, int mode) { @@ -973,3 +973,4 @@ int BIO_socket_nbio(int s, int mode) #endif return(ret == 0); } +#endif diff --git a/app/openssl/crypto/bio/bf_buff.c b/app/openssl/crypto/bio/bf_buff.c index c1fd75aa..4b5a132d 100644 --- a/app/openssl/crypto/bio/bf_buff.c +++ b/app/openssl/crypto/bio/bf_buff.c @@ -209,7 +209,7 @@ start: /* add to buffer and return */ if (i >= inl) { - memcpy(&(ctx->obuf[ctx->obuf_len]),in,inl); + memcpy(&(ctx->obuf[ctx->obuf_off+ctx->obuf_len]),in,inl); ctx->obuf_len+=inl; return(num+inl); } @@ -219,7 +219,7 @@ start: { if (i > 0) /* lets fill it up if we can */ { - memcpy(&(ctx->obuf[ctx->obuf_len]),in,i); + memcpy(&(ctx->obuf[ctx->obuf_off+ctx->obuf_len]),in,i); in+=i; inl-=i; num+=i; @@ -294,9 +294,9 @@ static long buffer_ctrl(BIO *b, int cmd, long num, void *ptr) case BIO_C_GET_BUFF_NUM_LINES: ret=0; p1=ctx->ibuf; - for (i=ctx->ibuf_off; iibuf_len; i++) + for (i=0; iibuf_len; i++) { - if (p1[i] == '\n') ret++; + if (p1[ctx->ibuf_off + i] == '\n') ret++; } break; case BIO_CTRL_WPENDING: @@ -399,17 +399,18 @@ static long buffer_ctrl(BIO *b, int cmd, long num, void *ptr) for (;;) { BIO_clear_retry_flags(b); - if (ctx->obuf_len > ctx->obuf_off) + if (ctx->obuf_len > 0) { r=BIO_write(b->next_bio, &(ctx->obuf[ctx->obuf_off]), - ctx->obuf_len-ctx->obuf_off); + ctx->obuf_len); #if 0 -fprintf(stderr,"FLUSH [%3d] %3d -> %3d\n",ctx->obuf_off,ctx->obuf_len-ctx->obuf_off,r); +fprintf(stderr,"FLUSH [%3d] %3d -> %3d\n",ctx->obuf_off,ctx->obuf_len,r); #endif BIO_copy_next_retry(b); if (r <= 0) return((long)r); ctx->obuf_off+=r; + ctx->obuf_len-=r; } else { diff --git a/app/openssl/crypto/bio/bio.h b/app/openssl/crypto/bio/bio.h index 152802fb..05699ab2 100644 --- a/app/openssl/crypto/bio/bio.h +++ b/app/openssl/crypto/bio/bio.h @@ -68,6 +68,14 @@ #include +#ifndef OPENSSL_NO_SCTP +# ifndef OPENSSL_SYS_VMS +# include +# else +# include +# endif +#endif + #ifdef __cplusplus extern "C" { #endif @@ -95,6 +103,9 @@ extern "C" { #define BIO_TYPE_BIO (19|0x0400) /* (half a) BIO pair */ #define BIO_TYPE_LINEBUFFER (20|0x0200) /* filter */ #define BIO_TYPE_DGRAM (21|0x0400|0x0100) +#ifndef OPENSSL_NO_SCTP +#define BIO_TYPE_DGRAM_SCTP (24|0x0400|0x0100) +#endif #define BIO_TYPE_ASN1 (22|0x0200) /* filter */ #define BIO_TYPE_COMP (23|0x0200) /* filter */ @@ -146,6 +157,7 @@ extern "C" { /* #endif */ #define BIO_CTRL_DGRAM_QUERY_MTU 40 /* as kernel for current MTU */ +#define BIO_CTRL_DGRAM_GET_FALLBACK_MTU 47 #define BIO_CTRL_DGRAM_GET_MTU 41 /* get cached value for MTU */ #define BIO_CTRL_DGRAM_SET_MTU 42 /* set cached value for * MTU. want to use this @@ -161,7 +173,22 @@ extern "C" { #define BIO_CTRL_DGRAM_SET_PEER 44 /* Destination for the data */ #define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45 /* Next DTLS handshake timeout to - * adjust socket timeouts */ + * adjust socket timeouts */ + +#ifndef OPENSSL_NO_SCTP +/* SCTP stuff */ +#define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 +#define BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY 51 +#define BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY 52 +#define BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD 53 +#define BIO_CTRL_DGRAM_SCTP_GET_SNDINFO 60 +#define BIO_CTRL_DGRAM_SCTP_SET_SNDINFO 61 +#define BIO_CTRL_DGRAM_SCTP_GET_RCVINFO 62 +#define BIO_CTRL_DGRAM_SCTP_SET_RCVINFO 63 +#define BIO_CTRL_DGRAM_SCTP_GET_PRINFO 64 +#define BIO_CTRL_DGRAM_SCTP_SET_PRINFO 65 +#define BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN 70 +#endif /* modifiers */ #define BIO_FP_READ 0x02 @@ -306,6 +333,15 @@ DECLARE_STACK_OF(BIO) typedef struct bio_f_buffer_ctx_struct { + /* Buffers are setup like this: + * + * <---------------------- size -----------------------> + * +---------------------------------------------------+ + * | consumed | remaining | free space | + * +---------------------------------------------------+ + * <-- off --><------- len -------> + */ + /* BIO *bio; */ /* this is now in the BIO struct */ int ibuf_size; /* how big is the input buffer */ int obuf_size; /* how big is the output buffer */ @@ -322,6 +358,34 @@ typedef struct bio_f_buffer_ctx_struct /* Prefix and suffix callback in ASN1 BIO */ typedef int asn1_ps_func(BIO *b, unsigned char **pbuf, int *plen, void *parg); +#ifndef OPENSSL_NO_SCTP +/* SCTP parameter structs */ +struct bio_dgram_sctp_sndinfo + { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; + }; + +struct bio_dgram_sctp_rcvinfo + { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; + }; + +struct bio_dgram_sctp_prinfo + { + uint16_t pr_policy; + uint32_t pr_value; + }; +#endif + /* connect BIO stuff */ #define BIO_CONN_S_BEFORE 1 #define BIO_CONN_S_GET_IP 2 @@ -619,6 +683,9 @@ BIO_METHOD *BIO_f_linebuffer(void); BIO_METHOD *BIO_f_nbio_test(void); #ifndef OPENSSL_NO_DGRAM BIO_METHOD *BIO_s_datagram(void); +#ifndef OPENSSL_NO_SCTP +BIO_METHOD *BIO_s_datagram_sctp(void); +#endif #endif /* BIO_METHOD *BIO_f_ber(void); */ @@ -661,6 +728,15 @@ int BIO_set_tcp_ndelay(int sock,int turn_on); BIO *BIO_new_socket(int sock, int close_flag); BIO *BIO_new_dgram(int fd, int close_flag); +#ifndef OPENSSL_NO_SCTP +BIO *BIO_new_dgram_sctp(int fd, int close_flag); +int BIO_dgram_is_sctp(BIO *bio); +int BIO_dgram_sctp_notification_cb(BIO *b, + void (*handle_notifications)(BIO *bio, void *context, void *buf), + void *context); +int BIO_dgram_sctp_wait_for_dry(BIO *b); +int BIO_dgram_sctp_msg_waiting(BIO *b); +#endif BIO *BIO_new_fd(int fd, int close_flag); BIO *BIO_new_connect(char *host_port); BIO *BIO_new_accept(char *host_port); @@ -725,6 +801,7 @@ void ERR_load_BIO_strings(void); #define BIO_F_BUFFER_CTRL 114 #define BIO_F_CONN_CTRL 127 #define BIO_F_CONN_STATE 115 +#define BIO_F_DGRAM_SCTP_READ 132 #define BIO_F_FILE_CTRL 116 #define BIO_F_FILE_READ 130 #define BIO_F_LINEBUFFER_CTRL 129 diff --git a/app/openssl/crypto/bio/bio_err.c b/app/openssl/crypto/bio/bio_err.c index a224edd5..0dbfbd80 100644 --- a/app/openssl/crypto/bio/bio_err.c +++ b/app/openssl/crypto/bio/bio_err.c @@ -1,6 +1,6 @@ /* crypto/bio/bio_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -95,6 +95,7 @@ static ERR_STRING_DATA BIO_str_functs[]= {ERR_FUNC(BIO_F_BUFFER_CTRL), "BUFFER_CTRL"}, {ERR_FUNC(BIO_F_CONN_CTRL), "CONN_CTRL"}, {ERR_FUNC(BIO_F_CONN_STATE), "CONN_STATE"}, +{ERR_FUNC(BIO_F_DGRAM_SCTP_READ), "DGRAM_SCTP_READ"}, {ERR_FUNC(BIO_F_FILE_CTRL), "FILE_CTRL"}, {ERR_FUNC(BIO_F_FILE_READ), "FILE_READ"}, {ERR_FUNC(BIO_F_LINEBUFFER_CTRL), "LINEBUFFER_CTRL"}, diff --git a/app/openssl/crypto/bio/bio_lib.c b/app/openssl/crypto/bio/bio_lib.c index e12bc3a2..9c9646af 100644 --- a/app/openssl/crypto/bio/bio_lib.c +++ b/app/openssl/crypto/bio/bio_lib.c @@ -521,40 +521,40 @@ void BIO_free_all(BIO *bio) BIO *BIO_dup_chain(BIO *in) { - BIO *ret=NULL,*eoc=NULL,*bio,*new; + BIO *ret=NULL,*eoc=NULL,*bio,*new_bio; for (bio=in; bio != NULL; bio=bio->next_bio) { - if ((new=BIO_new(bio->method)) == NULL) goto err; - new->callback=bio->callback; - new->cb_arg=bio->cb_arg; - new->init=bio->init; - new->shutdown=bio->shutdown; - new->flags=bio->flags; + if ((new_bio=BIO_new(bio->method)) == NULL) goto err; + new_bio->callback=bio->callback; + new_bio->cb_arg=bio->cb_arg; + new_bio->init=bio->init; + new_bio->shutdown=bio->shutdown; + new_bio->flags=bio->flags; /* This will let SSL_s_sock() work with stdin/stdout */ - new->num=bio->num; + new_bio->num=bio->num; - if (!BIO_dup_state(bio,(char *)new)) + if (!BIO_dup_state(bio,(char *)new_bio)) { - BIO_free(new); + BIO_free(new_bio); goto err; } /* copy app data */ - if (!CRYPTO_dup_ex_data(CRYPTO_EX_INDEX_BIO, &new->ex_data, + if (!CRYPTO_dup_ex_data(CRYPTO_EX_INDEX_BIO, &new_bio->ex_data, &bio->ex_data)) goto err; if (ret == NULL) { - eoc=new; + eoc=new_bio; ret=eoc; } else { - BIO_push(eoc,new); - eoc=new; + BIO_push(eoc,new_bio); + eoc=new_bio; } } return(ret); diff --git a/app/openssl/crypto/bio/bss_bio.c b/app/openssl/crypto/bio/bss_bio.c index 76bd48e7..52ef0ebc 100644 --- a/app/openssl/crypto/bio/bss_bio.c +++ b/app/openssl/crypto/bio/bss_bio.c @@ -277,10 +277,10 @@ static int bio_read(BIO *bio, char *buf, int size_) */ /* WARNING: The non-copying interface is largely untested as of yet * and may contain bugs. */ -static ssize_t bio_nread0(BIO *bio, char **buf) +static ossl_ssize_t bio_nread0(BIO *bio, char **buf) { struct bio_bio_st *b, *peer_b; - ssize_t num; + ossl_ssize_t num; BIO_clear_retry_flags(bio); @@ -315,15 +315,15 @@ static ssize_t bio_nread0(BIO *bio, char **buf) return num; } -static ssize_t bio_nread(BIO *bio, char **buf, size_t num_) +static ossl_ssize_t bio_nread(BIO *bio, char **buf, size_t num_) { struct bio_bio_st *b, *peer_b; - ssize_t num, available; + ossl_ssize_t num, available; if (num_ > SSIZE_MAX) num = SSIZE_MAX; else - num = (ssize_t)num_; + num = (ossl_ssize_t)num_; available = bio_nread0(bio, buf); if (num > available) @@ -428,7 +428,7 @@ static int bio_write(BIO *bio, const char *buf, int num_) * (example usage: bio_nwrite0(), write to buffer, bio_nwrite() * or just bio_nwrite(), write to buffer) */ -static ssize_t bio_nwrite0(BIO *bio, char **buf) +static ossl_ssize_t bio_nwrite0(BIO *bio, char **buf) { struct bio_bio_st *b; size_t num; @@ -476,15 +476,15 @@ static ssize_t bio_nwrite0(BIO *bio, char **buf) return num; } -static ssize_t bio_nwrite(BIO *bio, char **buf, size_t num_) +static ossl_ssize_t bio_nwrite(BIO *bio, char **buf, size_t num_) { struct bio_bio_st *b; - ssize_t num, space; + ossl_ssize_t num, space; if (num_ > SSIZE_MAX) num = SSIZE_MAX; else - num = (ssize_t)num_; + num = (ossl_ssize_t)num_; space = bio_nwrite0(bio, buf); if (num > space) diff --git a/app/openssl/crypto/bio/bss_dgram.c b/app/openssl/crypto/bio/bss_dgram.c index 71ebe987..54c012c4 100644 --- a/app/openssl/crypto/bio/bss_dgram.c +++ b/app/openssl/crypto/bio/bss_dgram.c @@ -70,10 +70,27 @@ #include #endif -#ifdef OPENSSL_SYS_LINUX +#ifndef OPENSSL_NO_SCTP +#include +#include +#define OPENSSL_SCTP_DATA_CHUNK_TYPE 0x00 +#define OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE 0xc0 +#endif + +#if defined(OPENSSL_SYS_LINUX) && !defined(IP_MTU) #define IP_MTU 14 /* linux is lame */ #endif +#if defined(__FreeBSD__) && defined(IN6_IS_ADDR_V4MAPPED) +/* Standard definition causes type-punning problems. */ +#undef IN6_IS_ADDR_V4MAPPED +#define s6_addr32 __u6_addr.__u6_addr32 +#define IN6_IS_ADDR_V4MAPPED(a) \ + (((a)->s6_addr32[0] == 0) && \ + ((a)->s6_addr32[1] == 0) && \ + ((a)->s6_addr32[2] == htonl(0x0000ffff))) +#endif + #ifdef WATT32 #define sock_write SockWrite /* Watt-32 uses same names */ #define sock_read SockRead @@ -88,6 +105,18 @@ static int dgram_new(BIO *h); static int dgram_free(BIO *data); static int dgram_clear(BIO *bio); +#ifndef OPENSSL_NO_SCTP +static int dgram_sctp_write(BIO *h, const char *buf, int num); +static int dgram_sctp_read(BIO *h, char *buf, int size); +static int dgram_sctp_puts(BIO *h, const char *str); +static long dgram_sctp_ctrl(BIO *h, int cmd, long arg1, void *arg2); +static int dgram_sctp_new(BIO *h); +static int dgram_sctp_free(BIO *data); +#ifdef SCTP_AUTHENTICATION_EVENT +static void dgram_sctp_handle_auth_free_key_event(BIO *b, union sctp_notification *snp); +#endif +#endif + static int BIO_dgram_should_retry(int s); static void get_current_time(struct timeval *t); @@ -106,6 +135,22 @@ static BIO_METHOD methods_dgramp= NULL, }; +#ifndef OPENSSL_NO_SCTP +static BIO_METHOD methods_dgramp_sctp= + { + BIO_TYPE_DGRAM_SCTP, + "datagram sctp socket", + dgram_sctp_write, + dgram_sctp_read, + dgram_sctp_puts, + NULL, /* dgram_gets, */ + dgram_sctp_ctrl, + dgram_sctp_new, + dgram_sctp_free, + NULL, + }; +#endif + typedef struct bio_dgram_data_st { union { @@ -122,6 +167,40 @@ typedef struct bio_dgram_data_st struct timeval socket_timeout; } bio_dgram_data; +#ifndef OPENSSL_NO_SCTP +typedef struct bio_dgram_sctp_save_message_st + { + BIO *bio; + char *data; + int length; + } bio_dgram_sctp_save_message; + +typedef struct bio_dgram_sctp_data_st + { + union { + struct sockaddr sa; + struct sockaddr_in sa_in; +#if OPENSSL_USE_IPV6 + struct sockaddr_in6 sa_in6; +#endif + } peer; + unsigned int connected; + unsigned int _errno; + unsigned int mtu; + struct bio_dgram_sctp_sndinfo sndinfo; + struct bio_dgram_sctp_rcvinfo rcvinfo; + struct bio_dgram_sctp_prinfo prinfo; + void (*handle_notifications)(BIO *bio, void *context, void *buf); + void* notification_context; + int in_handshake; + int ccs_rcvd; + int ccs_sent; + int save_shutdown; + int peer_auth_tested; + bio_dgram_sctp_save_message saved_message; + } bio_dgram_sctp_data; +#endif + BIO_METHOD *BIO_s_datagram(void) { return(&methods_dgramp); @@ -186,7 +265,7 @@ static void dgram_adjust_rcv_timeout(BIO *b) { #if defined(SO_RCVTIMEO) bio_dgram_data *data = (bio_dgram_data *)b->ptr; - int sz = sizeof(int); + union { size_t s; int i; } sz = {0}; /* Is a timer active? */ if (data->next_timeout.tv_sec > 0 || data->next_timeout.tv_usec > 0) @@ -196,8 +275,10 @@ static void dgram_adjust_rcv_timeout(BIO *b) /* Read current socket timeout */ #ifdef OPENSSL_SYS_WINDOWS int timeout; + + sz.i = sizeof(timeout); if (getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, - (void*)&timeout, &sz) < 0) + (void*)&timeout, &sz.i) < 0) { perror("getsockopt"); } else { @@ -205,9 +286,12 @@ static void dgram_adjust_rcv_timeout(BIO *b) data->socket_timeout.tv_usec = (timeout % 1000) * 1000; } #else + sz.i = sizeof(data->socket_timeout); if ( getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, &(data->socket_timeout), (void *)&sz) < 0) { perror("getsockopt"); } + else if (sizeof(sz.s)!=sizeof(sz.i) && sz.i==0) + OPENSSL_assert(sz.s<=sizeof(data->socket_timeout)); #endif /* Get current time */ @@ -376,11 +460,10 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) int *ip; struct sockaddr *to = NULL; bio_dgram_data *data = NULL; -#if defined(IP_MTU_DISCOVER) || defined(IP_MTU) - long sockopt_val = 0; - unsigned int sockopt_len = 0; -#endif -#ifdef OPENSSL_SYS_LINUX +#if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU)) + int sockopt_val = 0; + socklen_t sockopt_len; /* assume that system supporting IP_MTU is + * modern enough to define socklen_t */ socklen_t addr_len; union { struct sockaddr sa; @@ -462,7 +545,7 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) break; /* (Linux)kernel sets DF bit on outgoing IP packets */ case BIO_CTRL_DGRAM_MTU_DISCOVER: -#ifdef OPENSSL_SYS_LINUX +#if defined(OPENSSL_SYS_LINUX) && defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO) addr_len = (socklen_t)sizeof(addr); memset((void *)&addr, 0, sizeof(addr)); if (getsockname(b->num, &addr.sa, &addr_len) < 0) @@ -470,7 +553,6 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; break; } - sockopt_len = sizeof(sockopt_val); switch (addr.sa.sa_family) { case AF_INET: @@ -479,7 +561,7 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) &sockopt_val, sizeof(sockopt_val))) < 0) perror("setsockopt"); break; -#if OPENSSL_USE_IPV6 && defined(IPV6_MTU_DISCOVER) +#if OPENSSL_USE_IPV6 && defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO) case AF_INET6: sockopt_val = IPV6_PMTUDISC_DO; if ((ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_MTU_DISCOVER, @@ -496,7 +578,7 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) break; #endif case BIO_CTRL_DGRAM_QUERY_MTU: -#ifdef OPENSSL_SYS_LINUX +#if defined(OPENSSL_SYS_LINUX) && defined(IP_MTU) addr_len = (socklen_t)sizeof(addr); memset((void *)&addr, 0, sizeof(addr)); if (getsockname(b->num, &addr.sa, &addr_len) < 0) @@ -547,6 +629,27 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; #endif break; + case BIO_CTRL_DGRAM_GET_FALLBACK_MTU: + switch (data->peer.sa.sa_family) + { + case AF_INET: + ret = 576 - 20 - 8; + break; +#if OPENSSL_USE_IPV6 + case AF_INET6: +#ifdef IN6_IS_ADDR_V4MAPPED + if (IN6_IS_ADDR_V4MAPPED(&data->peer.sa_in6.sin6_addr)) + ret = 576 - 20 - 8; + else +#endif + ret = 1280 - 40 - 8; + break; +#endif + default: + ret = 576 - 20 - 8; + break; + } + break; case BIO_CTRL_DGRAM_GET_MTU: return data->mtu; break; @@ -637,12 +740,15 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) #endif break; case BIO_CTRL_DGRAM_GET_RECV_TIMEOUT: -#ifdef OPENSSL_SYS_WINDOWS { - int timeout, sz = sizeof(timeout); + union { size_t s; int i; } sz = {0}; +#ifdef OPENSSL_SYS_WINDOWS + int timeout; struct timeval *tv = (struct timeval *)ptr; + + sz.i = sizeof(timeout); if (getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, - (void*)&timeout, &sz) < 0) + (void*)&timeout, &sz.i) < 0) { perror("getsockopt"); ret = -1; } else { @@ -650,12 +756,20 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) tv->tv_usec = (timeout % 1000) * 1000; ret = sizeof(*tv); } - } #else + sz.i = sizeof(struct timeval); if ( getsockopt(b->num, SOL_SOCKET, SO_RCVTIMEO, - ptr, (void *)&ret) < 0) + ptr, (void *)&sz) < 0) { perror("getsockopt"); ret = -1; } + else if (sizeof(sz.s)!=sizeof(sz.i) && sz.i==0) + { + OPENSSL_assert(sz.s<=sizeof(struct timeval)); + ret = (int)sz.s; + } + else + ret = sz.i; #endif + } break; #endif #if defined(SO_SNDTIMEO) @@ -675,12 +789,15 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) #endif break; case BIO_CTRL_DGRAM_GET_SEND_TIMEOUT: -#ifdef OPENSSL_SYS_WINDOWS { - int timeout, sz = sizeof(timeout); + union { size_t s; int i; } sz = {0}; +#ifdef OPENSSL_SYS_WINDOWS + int timeout; struct timeval *tv = (struct timeval *)ptr; + + sz.i = sizeof(timeout); if (getsockopt(b->num, SOL_SOCKET, SO_SNDTIMEO, - (void*)&timeout, &sz) < 0) + (void*)&timeout, &sz.i) < 0) { perror("getsockopt"); ret = -1; } else { @@ -688,12 +805,20 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) tv->tv_usec = (timeout % 1000) * 1000; ret = sizeof(*tv); } - } #else + sz.i = sizeof(struct timeval); if ( getsockopt(b->num, SOL_SOCKET, SO_SNDTIMEO, - ptr, (void *)&ret) < 0) + ptr, (void *)&sz) < 0) { perror("getsockopt"); ret = -1; } + else if (sizeof(sz.s)!=sizeof(sz.i) && sz.i==0) + { + OPENSSL_assert(sz.s<=sizeof(struct timeval)); + ret = (int)sz.s; + } + else + ret = sz.i; #endif + } break; #endif case BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP: @@ -738,6 +863,910 @@ static int dgram_puts(BIO *bp, const char *str) return(ret); } +#ifndef OPENSSL_NO_SCTP +BIO_METHOD *BIO_s_datagram_sctp(void) + { + return(&methods_dgramp_sctp); + } + +BIO *BIO_new_dgram_sctp(int fd, int close_flag) + { + BIO *bio; + int ret, optval = 20000; + int auth_data = 0, auth_forward = 0; + unsigned char *p; + struct sctp_authchunk auth; + struct sctp_authchunks *authchunks; + socklen_t sockopt_len; +#ifdef SCTP_AUTHENTICATION_EVENT +#ifdef SCTP_EVENT + struct sctp_event event; +#else + struct sctp_event_subscribe event; +#endif +#endif + + bio=BIO_new(BIO_s_datagram_sctp()); + if (bio == NULL) return(NULL); + BIO_set_fd(bio,fd,close_flag); + + /* Activate SCTP-AUTH for DATA and FORWARD-TSN chunks */ + auth.sauth_chunk = OPENSSL_SCTP_DATA_CHUNK_TYPE; + ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk)); + OPENSSL_assert(ret >= 0); + auth.sauth_chunk = OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE; + ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk)); + OPENSSL_assert(ret >= 0); + + /* Test if activation was successful. When using accept(), + * SCTP-AUTH has to be activated for the listening socket + * already, otherwise the connected socket won't use it. */ + sockopt_len = (socklen_t)(sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t)); + authchunks = OPENSSL_malloc(sockopt_len); + memset(authchunks, 0, sizeof(sockopt_len)); + ret = getsockopt(fd, IPPROTO_SCTP, SCTP_LOCAL_AUTH_CHUNKS, authchunks, &sockopt_len); + OPENSSL_assert(ret >= 0); + + for (p = (unsigned char*) authchunks->gauth_chunks; + p < (unsigned char*) authchunks + sockopt_len; + p += sizeof(uint8_t)) + { + if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1; + if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1; + } + + OPENSSL_free(authchunks); + + OPENSSL_assert(auth_data); + OPENSSL_assert(auth_forward); + +#ifdef SCTP_AUTHENTICATION_EVENT +#ifdef SCTP_EVENT + memset(&event, 0, sizeof(struct sctp_event)); + event.se_assoc_id = 0; + event.se_type = SCTP_AUTHENTICATION_EVENT; + event.se_on = 1; + ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); + OPENSSL_assert(ret >= 0); +#else + sockopt_len = (socklen_t) sizeof(struct sctp_event_subscribe); + ret = getsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, &sockopt_len); + OPENSSL_assert(ret >= 0); + + event.sctp_authentication_event = 1; + + ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); + OPENSSL_assert(ret >= 0); +#endif +#endif + + /* Disable partial delivery by setting the min size + * larger than the max record size of 2^14 + 2048 + 13 + */ + ret = setsockopt(fd, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT, &optval, sizeof(optval)); + OPENSSL_assert(ret >= 0); + + return(bio); + } + +int BIO_dgram_is_sctp(BIO *bio) + { + return (BIO_method_type(bio) == BIO_TYPE_DGRAM_SCTP); + } + +static int dgram_sctp_new(BIO *bi) + { + bio_dgram_sctp_data *data = NULL; + + bi->init=0; + bi->num=0; + data = OPENSSL_malloc(sizeof(bio_dgram_sctp_data)); + if (data == NULL) + return 0; + memset(data, 0x00, sizeof(bio_dgram_sctp_data)); +#ifdef SCTP_PR_SCTP_NONE + data->prinfo.pr_policy = SCTP_PR_SCTP_NONE; +#endif + bi->ptr = data; + + bi->flags=0; + return(1); + } + +static int dgram_sctp_free(BIO *a) + { + bio_dgram_sctp_data *data; + + if (a == NULL) return(0); + if ( ! dgram_clear(a)) + return 0; + + data = (bio_dgram_sctp_data *)a->ptr; + if(data != NULL) OPENSSL_free(data); + + return(1); + } + +#ifdef SCTP_AUTHENTICATION_EVENT +void dgram_sctp_handle_auth_free_key_event(BIO *b, union sctp_notification *snp) + { + int ret; + struct sctp_authkey_event* authkeyevent = &snp->sn_auth_event; + + if (authkeyevent->auth_indication == SCTP_AUTH_FREE_KEY) + { + struct sctp_authkeyid authkeyid; + + /* delete key */ + authkeyid.scact_keynumber = authkeyevent->auth_keynumber; + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DELETE_KEY, + &authkeyid, sizeof(struct sctp_authkeyid)); + } + } +#endif + +static int dgram_sctp_read(BIO *b, char *out, int outl) + { + int ret = 0, n = 0, i, optval; + socklen_t optlen; + bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr; + union sctp_notification *snp; + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsg; + char cmsgbuf[512]; + + if (out != NULL) + { + clear_socket_error(); + + do + { + memset(&data->rcvinfo, 0x00, sizeof(struct bio_dgram_sctp_rcvinfo)); + iov.iov_base = out; + iov.iov_len = outl; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = 512; + msg.msg_flags = 0; + n = recvmsg(b->num, &msg, 0); + + if (msg.msg_controllen > 0) + { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level != IPPROTO_SCTP) + continue; +#ifdef SCTP_RCVINFO + if (cmsg->cmsg_type == SCTP_RCVINFO) + { + struct sctp_rcvinfo *rcvinfo; + + rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmsg); + data->rcvinfo.rcv_sid = rcvinfo->rcv_sid; + data->rcvinfo.rcv_ssn = rcvinfo->rcv_ssn; + data->rcvinfo.rcv_flags = rcvinfo->rcv_flags; + data->rcvinfo.rcv_ppid = rcvinfo->rcv_ppid; + data->rcvinfo.rcv_tsn = rcvinfo->rcv_tsn; + data->rcvinfo.rcv_cumtsn = rcvinfo->rcv_cumtsn; + data->rcvinfo.rcv_context = rcvinfo->rcv_context; + } +#endif +#ifdef SCTP_SNDRCV + if (cmsg->cmsg_type == SCTP_SNDRCV) + { + struct sctp_sndrcvinfo *sndrcvinfo; + + sndrcvinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + data->rcvinfo.rcv_sid = sndrcvinfo->sinfo_stream; + data->rcvinfo.rcv_ssn = sndrcvinfo->sinfo_ssn; + data->rcvinfo.rcv_flags = sndrcvinfo->sinfo_flags; + data->rcvinfo.rcv_ppid = sndrcvinfo->sinfo_ppid; + data->rcvinfo.rcv_tsn = sndrcvinfo->sinfo_tsn; + data->rcvinfo.rcv_cumtsn = sndrcvinfo->sinfo_cumtsn; + data->rcvinfo.rcv_context = sndrcvinfo->sinfo_context; + } +#endif + } + } + + if (n <= 0) + { + if (n < 0) + ret = n; + break; + } + + if (msg.msg_flags & MSG_NOTIFICATION) + { + snp = (union sctp_notification*) out; + if (snp->sn_header.sn_type == SCTP_SENDER_DRY_EVENT) + { +#ifdef SCTP_EVENT + struct sctp_event event; +#else + struct sctp_event_subscribe event; + socklen_t eventsize; +#endif + /* If a message has been delayed until the socket + * is dry, it can be sent now. + */ + if (data->saved_message.length > 0) + { + dgram_sctp_write(data->saved_message.bio, data->saved_message.data, + data->saved_message.length); + OPENSSL_free(data->saved_message.data); + data->saved_message.length = 0; + } + + /* disable sender dry event */ +#ifdef SCTP_EVENT + memset(&event, 0, sizeof(struct sctp_event)); + event.se_assoc_id = 0; + event.se_type = SCTP_SENDER_DRY_EVENT; + event.se_on = 0; + i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); + OPENSSL_assert(i >= 0); +#else + eventsize = sizeof(struct sctp_event_subscribe); + i = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize); + OPENSSL_assert(i >= 0); + + event.sctp_sender_dry_event = 0; + + i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); + OPENSSL_assert(i >= 0); +#endif + } + +#ifdef SCTP_AUTHENTICATION_EVENT + if (snp->sn_header.sn_type == SCTP_AUTHENTICATION_EVENT) + dgram_sctp_handle_auth_free_key_event(b, snp); +#endif + + if (data->handle_notifications != NULL) + data->handle_notifications(b, data->notification_context, (void*) out); + + memset(out, 0, outl); + } + else + ret += n; + } + while ((msg.msg_flags & MSG_NOTIFICATION) && (msg.msg_flags & MSG_EOR) && (ret < outl)); + + if (ret > 0 && !(msg.msg_flags & MSG_EOR)) + { + /* Partial message read, this should never happen! */ + + /* The buffer was too small, this means the peer sent + * a message that was larger than allowed. */ + if (ret == outl) + return -1; + + /* Test if socket buffer can handle max record + * size (2^14 + 2048 + 13) + */ + optlen = (socklen_t) sizeof(int); + ret = getsockopt(b->num, SOL_SOCKET, SO_RCVBUF, &optval, &optlen); + OPENSSL_assert(ret >= 0); + OPENSSL_assert(optval >= 18445); + + /* Test if SCTP doesn't partially deliver below + * max record size (2^14 + 2048 + 13) + */ + optlen = (socklen_t) sizeof(int); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT, + &optval, &optlen); + OPENSSL_assert(ret >= 0); + OPENSSL_assert(optval >= 18445); + + /* Partially delivered notification??? Probably a bug.... */ + OPENSSL_assert(!(msg.msg_flags & MSG_NOTIFICATION)); + + /* Everything seems ok till now, so it's most likely + * a message dropped by PR-SCTP. + */ + memset(out, 0, outl); + BIO_set_retry_read(b); + return -1; + } + + BIO_clear_retry_flags(b); + if (ret < 0) + { + if (BIO_dgram_should_retry(ret)) + { + BIO_set_retry_read(b); + data->_errno = get_last_socket_error(); + } + } + + /* Test if peer uses SCTP-AUTH before continuing */ + if (!data->peer_auth_tested) + { + int ii, auth_data = 0, auth_forward = 0; + unsigned char *p; + struct sctp_authchunks *authchunks; + + optlen = (socklen_t)(sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t)); + authchunks = OPENSSL_malloc(optlen); + memset(authchunks, 0, sizeof(optlen)); + ii = getsockopt(b->num, IPPROTO_SCTP, SCTP_PEER_AUTH_CHUNKS, authchunks, &optlen); + OPENSSL_assert(ii >= 0); + + for (p = (unsigned char*) authchunks->gauth_chunks; + p < (unsigned char*) authchunks + optlen; + p += sizeof(uint8_t)) + { + if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1; + if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1; + } + + OPENSSL_free(authchunks); + + if (!auth_data || !auth_forward) + { + BIOerr(BIO_F_DGRAM_SCTP_READ,BIO_R_CONNECT_ERROR); + return -1; + } + + data->peer_auth_tested = 1; + } + } + return(ret); + } + +static int dgram_sctp_write(BIO *b, const char *in, int inl) + { + int ret; + bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr; + struct bio_dgram_sctp_sndinfo *sinfo = &(data->sndinfo); + struct bio_dgram_sctp_prinfo *pinfo = &(data->prinfo); + struct bio_dgram_sctp_sndinfo handshake_sinfo; + struct iovec iov[1]; + struct msghdr msg; + struct cmsghdr *cmsg; +#if defined(SCTP_SNDINFO) && defined(SCTP_PRINFO) + char cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndinfo)) + CMSG_SPACE(sizeof(struct sctp_prinfo))]; + struct sctp_sndinfo *sndinfo; + struct sctp_prinfo *prinfo; +#else + char cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; + struct sctp_sndrcvinfo *sndrcvinfo; +#endif + + clear_socket_error(); + + /* If we're send anything else than application data, + * disable all user parameters and flags. + */ + if (in[0] != 23) { + memset(&handshake_sinfo, 0x00, sizeof(struct bio_dgram_sctp_sndinfo)); +#ifdef SCTP_SACK_IMMEDIATELY + handshake_sinfo.snd_flags = SCTP_SACK_IMMEDIATELY; +#endif + sinfo = &handshake_sinfo; + } + + /* If we have to send a shutdown alert message and the + * socket is not dry yet, we have to save it and send it + * as soon as the socket gets dry. + */ + if (data->save_shutdown && !BIO_dgram_sctp_wait_for_dry(b)) + { + data->saved_message.bio = b; + data->saved_message.length = inl; + data->saved_message.data = OPENSSL_malloc(inl); + memcpy(data->saved_message.data, in, inl); + return inl; + } + + iov[0].iov_base = (char *)in; + iov[0].iov_len = inl; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = (caddr_t)cmsgbuf; + msg.msg_controllen = 0; + msg.msg_flags = 0; +#if defined(SCTP_SNDINFO) && defined(SCTP_PRINFO) + cmsg = (struct cmsghdr *)cmsgbuf; + cmsg->cmsg_level = IPPROTO_SCTP; + cmsg->cmsg_type = SCTP_SNDINFO; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndinfo)); + sndinfo = (struct sctp_sndinfo *)CMSG_DATA(cmsg); + memset(sndinfo, 0, sizeof(struct sctp_sndinfo)); + sndinfo->snd_sid = sinfo->snd_sid; + sndinfo->snd_flags = sinfo->snd_flags; + sndinfo->snd_ppid = sinfo->snd_ppid; + sndinfo->snd_context = sinfo->snd_context; + msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_sndinfo)); + + cmsg = (struct cmsghdr *)&cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndinfo))]; + cmsg->cmsg_level = IPPROTO_SCTP; + cmsg->cmsg_type = SCTP_PRINFO; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_prinfo)); + prinfo = (struct sctp_prinfo *)CMSG_DATA(cmsg); + memset(prinfo, 0, sizeof(struct sctp_prinfo)); + prinfo->pr_policy = pinfo->pr_policy; + prinfo->pr_value = pinfo->pr_value; + msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_prinfo)); +#else + cmsg = (struct cmsghdr *)cmsgbuf; + cmsg->cmsg_level = IPPROTO_SCTP; + cmsg->cmsg_type = SCTP_SNDRCV; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + sndrcvinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo)); + sndrcvinfo->sinfo_stream = sinfo->snd_sid; + sndrcvinfo->sinfo_flags = sinfo->snd_flags; +#ifdef __FreeBSD__ + sndrcvinfo->sinfo_flags |= pinfo->pr_policy; +#endif + sndrcvinfo->sinfo_ppid = sinfo->snd_ppid; + sndrcvinfo->sinfo_context = sinfo->snd_context; + sndrcvinfo->sinfo_timetolive = pinfo->pr_value; + msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); +#endif + + ret = sendmsg(b->num, &msg, 0); + + BIO_clear_retry_flags(b); + if (ret <= 0) + { + if (BIO_dgram_should_retry(ret)) + { + BIO_set_retry_write(b); + data->_errno = get_last_socket_error(); + } + } + return(ret); + } + +static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) + { + long ret=1; + bio_dgram_sctp_data *data = NULL; + socklen_t sockopt_len = 0; + struct sctp_authkeyid authkeyid; + struct sctp_authkey *authkey; + + data = (bio_dgram_sctp_data *)b->ptr; + + switch (cmd) + { + case BIO_CTRL_DGRAM_QUERY_MTU: + /* Set to maximum (2^14) + * and ignore user input to enable transport + * protocol fragmentation. + * Returns always 2^14. + */ + data->mtu = 16384; + ret = data->mtu; + break; + case BIO_CTRL_DGRAM_SET_MTU: + /* Set to maximum (2^14) + * and ignore input to enable transport + * protocol fragmentation. + * Returns always 2^14. + */ + data->mtu = 16384; + ret = data->mtu; + break; + case BIO_CTRL_DGRAM_SET_CONNECTED: + case BIO_CTRL_DGRAM_CONNECT: + /* Returns always -1. */ + ret = -1; + break; + case BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT: + /* SCTP doesn't need the DTLS timer + * Returns always 1. + */ + break; + case BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE: + if (num > 0) + data->in_handshake = 1; + else + data->in_handshake = 0; + + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_NODELAY, &data->in_handshake, sizeof(int)); + break; + case BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY: + /* New shared key for SCTP AUTH. + * Returns 0 on success, -1 otherwise. + */ + + /* Get active key */ + sockopt_len = sizeof(struct sctp_authkeyid); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len); + if (ret < 0) break; + + /* Add new key */ + sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t); + authkey = OPENSSL_malloc(sockopt_len); + memset(authkey, 0x00, sockopt_len); + authkey->sca_keynumber = authkeyid.scact_keynumber + 1; +#ifndef __FreeBSD__ + /* This field is missing in FreeBSD 8.2 and earlier, + * and FreeBSD 8.3 and higher work without it. + */ + authkey->sca_keylength = 64; +#endif + memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t)); + + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len); + if (ret < 0) break; + + /* Reset active key */ + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, + &authkeyid, sizeof(struct sctp_authkeyid)); + if (ret < 0) break; + + break; + case BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY: + /* Returns 0 on success, -1 otherwise. */ + + /* Get active key */ + sockopt_len = sizeof(struct sctp_authkeyid); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len); + if (ret < 0) break; + + /* Set active key */ + authkeyid.scact_keynumber = authkeyid.scact_keynumber + 1; + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, + &authkeyid, sizeof(struct sctp_authkeyid)); + if (ret < 0) break; + + /* CCS has been sent, so remember that and fall through + * to check if we need to deactivate an old key + */ + data->ccs_sent = 1; + + case BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD: + /* Returns 0 on success, -1 otherwise. */ + + /* Has this command really been called or is this just a fall-through? */ + if (cmd == BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD) + data->ccs_rcvd = 1; + + /* CSS has been both, received and sent, so deactivate an old key */ + if (data->ccs_rcvd == 1 && data->ccs_sent == 1) + { + /* Get active key */ + sockopt_len = sizeof(struct sctp_authkeyid); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len); + if (ret < 0) break; + + /* Deactivate key or delete second last key if + * SCTP_AUTHENTICATION_EVENT is not available. + */ + authkeyid.scact_keynumber = authkeyid.scact_keynumber - 1; +#ifdef SCTP_AUTH_DEACTIVATE_KEY + sockopt_len = sizeof(struct sctp_authkeyid); + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DEACTIVATE_KEY, + &authkeyid, sockopt_len); + if (ret < 0) break; +#endif +#ifndef SCTP_AUTHENTICATION_EVENT + if (authkeyid.scact_keynumber > 0) + { + authkeyid.scact_keynumber = authkeyid.scact_keynumber - 1; + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DELETE_KEY, + &authkeyid, sizeof(struct sctp_authkeyid)); + if (ret < 0) break; + } +#endif + + data->ccs_rcvd = 0; + data->ccs_sent = 0; + } + break; + case BIO_CTRL_DGRAM_SCTP_GET_SNDINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_sndinfo)) + num = sizeof(struct bio_dgram_sctp_sndinfo); + + memcpy(ptr, &(data->sndinfo), num); + ret = num; + break; + case BIO_CTRL_DGRAM_SCTP_SET_SNDINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_sndinfo)) + num = sizeof(struct bio_dgram_sctp_sndinfo); + + memcpy(&(data->sndinfo), ptr, num); + break; + case BIO_CTRL_DGRAM_SCTP_GET_RCVINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_rcvinfo)) + num = sizeof(struct bio_dgram_sctp_rcvinfo); + + memcpy(ptr, &data->rcvinfo, num); + + ret = num; + break; + case BIO_CTRL_DGRAM_SCTP_SET_RCVINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_rcvinfo)) + num = sizeof(struct bio_dgram_sctp_rcvinfo); + + memcpy(&(data->rcvinfo), ptr, num); + break; + case BIO_CTRL_DGRAM_SCTP_GET_PRINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_prinfo)) + num = sizeof(struct bio_dgram_sctp_prinfo); + + memcpy(ptr, &(data->prinfo), num); + ret = num; + break; + case BIO_CTRL_DGRAM_SCTP_SET_PRINFO: + /* Returns the size of the copied struct. */ + if (num > (long) sizeof(struct bio_dgram_sctp_prinfo)) + num = sizeof(struct bio_dgram_sctp_prinfo); + + memcpy(&(data->prinfo), ptr, num); + break; + case BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN: + /* Returns always 1. */ + if (num > 0) + data->save_shutdown = 1; + else + data->save_shutdown = 0; + break; + + default: + /* Pass to default ctrl function to + * process SCTP unspecific commands + */ + ret=dgram_ctrl(b, cmd, num, ptr); + break; + } + return(ret); + } + +int BIO_dgram_sctp_notification_cb(BIO *b, + void (*handle_notifications)(BIO *bio, void *context, void *buf), + void *context) + { + bio_dgram_sctp_data *data = (bio_dgram_sctp_data *) b->ptr; + + if (handle_notifications != NULL) + { + data->handle_notifications = handle_notifications; + data->notification_context = context; + } + else + return -1; + + return 0; + } + +int BIO_dgram_sctp_wait_for_dry(BIO *b) +{ + int is_dry = 0; + int n, sockflags, ret; + union sctp_notification snp; + struct msghdr msg; + struct iovec iov; +#ifdef SCTP_EVENT + struct sctp_event event; +#else + struct sctp_event_subscribe event; + socklen_t eventsize; +#endif + bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr; + + /* set sender dry event */ +#ifdef SCTP_EVENT + memset(&event, 0, sizeof(struct sctp_event)); + event.se_assoc_id = 0; + event.se_type = SCTP_SENDER_DRY_EVENT; + event.se_on = 1; + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); +#else + eventsize = sizeof(struct sctp_event_subscribe); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize); + if (ret < 0) + return -1; + + event.sctp_sender_dry_event = 1; + + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); +#endif + if (ret < 0) + return -1; + + /* peek for notification */ + memset(&snp, 0x00, sizeof(union sctp_notification)); + iov.iov_base = (char *)&snp; + iov.iov_len = sizeof(union sctp_notification); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + n = recvmsg(b->num, &msg, MSG_PEEK); + if (n <= 0) + { + if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK)) + return -1; + else + return 0; + } + + /* if we find a notification, process it and try again if necessary */ + while (msg.msg_flags & MSG_NOTIFICATION) + { + memset(&snp, 0x00, sizeof(union sctp_notification)); + iov.iov_base = (char *)&snp; + iov.iov_len = sizeof(union sctp_notification); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + n = recvmsg(b->num, &msg, 0); + if (n <= 0) + { + if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK)) + return -1; + else + return is_dry; + } + + if (snp.sn_header.sn_type == SCTP_SENDER_DRY_EVENT) + { + is_dry = 1; + + /* disable sender dry event */ +#ifdef SCTP_EVENT + memset(&event, 0, sizeof(struct sctp_event)); + event.se_assoc_id = 0; + event.se_type = SCTP_SENDER_DRY_EVENT; + event.se_on = 0; + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); +#else + eventsize = (socklen_t) sizeof(struct sctp_event_subscribe); + ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize); + if (ret < 0) + return -1; + + event.sctp_sender_dry_event = 0; + + ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); +#endif + if (ret < 0) + return -1; + } + +#ifdef SCTP_AUTHENTICATION_EVENT + if (snp.sn_header.sn_type == SCTP_AUTHENTICATION_EVENT) + dgram_sctp_handle_auth_free_key_event(b, &snp); +#endif + + if (data->handle_notifications != NULL) + data->handle_notifications(b, data->notification_context, (void*) &snp); + + /* found notification, peek again */ + memset(&snp, 0x00, sizeof(union sctp_notification)); + iov.iov_base = (char *)&snp; + iov.iov_len = sizeof(union sctp_notification); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + /* if we have seen the dry already, don't wait */ + if (is_dry) + { + sockflags = fcntl(b->num, F_GETFL, 0); + fcntl(b->num, F_SETFL, O_NONBLOCK); + } + + n = recvmsg(b->num, &msg, MSG_PEEK); + + if (is_dry) + { + fcntl(b->num, F_SETFL, sockflags); + } + + if (n <= 0) + { + if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK)) + return -1; + else + return is_dry; + } + } + + /* read anything else */ + return is_dry; +} + +int BIO_dgram_sctp_msg_waiting(BIO *b) + { + int n, sockflags; + union sctp_notification snp; + struct msghdr msg; + struct iovec iov; + bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr; + + /* Check if there are any messages waiting to be read */ + do + { + memset(&snp, 0x00, sizeof(union sctp_notification)); + iov.iov_base = (char *)&snp; + iov.iov_len = sizeof(union sctp_notification); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + sockflags = fcntl(b->num, F_GETFL, 0); + fcntl(b->num, F_SETFL, O_NONBLOCK); + n = recvmsg(b->num, &msg, MSG_PEEK); + fcntl(b->num, F_SETFL, sockflags); + + /* if notification, process and try again */ + if (n > 0 && (msg.msg_flags & MSG_NOTIFICATION)) + { +#ifdef SCTP_AUTHENTICATION_EVENT + if (snp.sn_header.sn_type == SCTP_AUTHENTICATION_EVENT) + dgram_sctp_handle_auth_free_key_event(b, &snp); +#endif + + memset(&snp, 0x00, sizeof(union sctp_notification)); + iov.iov_base = (char *)&snp; + iov.iov_len = sizeof(union sctp_notification); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + n = recvmsg(b->num, &msg, 0); + + if (data->handle_notifications != NULL) + data->handle_notifications(b, data->notification_context, (void*) &snp); + } + + } while (n > 0 && (msg.msg_flags & MSG_NOTIFICATION)); + + /* Return 1 if there is a message to be read, return 0 otherwise. */ + if (n > 0) + return 1; + else + return 0; + } + +static int dgram_sctp_puts(BIO *bp, const char *str) + { + int n,ret; + + n=strlen(str); + ret=dgram_sctp_write(bp,str,n); + return(ret); + } +#endif + static int BIO_dgram_should_retry(int i) { int err; diff --git a/app/openssl/crypto/bio/bss_log.c b/app/openssl/crypto/bio/bss_log.c index b7dce5c1..2227b2b5 100644 --- a/app/openssl/crypto/bio/bss_log.c +++ b/app/openssl/crypto/bio/bss_log.c @@ -245,7 +245,7 @@ static int MS_CALLBACK slg_puts(BIO *bp, const char *str) static void xopenlog(BIO* bp, char* name, int level) { - if (GetVersion() < 0x80000000) + if (check_winnt()) bp->ptr = RegisterEventSourceA(NULL,name); else bp->ptr = NULL; diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.S b/app/openssl/crypto/bn/asm/armv4-gf2m.S new file mode 100644 index 00000000..038f0864 --- /dev/null +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.S @@ -0,0 +1,213 @@ +#include "arm_arch.h" + +.text +.code 32 + +#if __ARM_ARCH__>=7 +.fpu neon + +.type mul_1x1_neon,%function +.align 5 +mul_1x1_neon: + vshl.u64 d2,d16,#8 @ q1-q3 are slided + vmull.p8 q0,d16,d17 @ a·bb + vshl.u64 d4,d16,#16 + vmull.p8 q1,d2,d17 @ a<<8·bb + vshl.u64 d6,d16,#24 + vmull.p8 q2,d4,d17 @ a<<16·bb + vshr.u64 d2,#8 + vmull.p8 q3,d6,d17 @ a<<24·bb + vshl.u64 d3,#24 + veor d0,d2 + vshr.u64 d4,#16 + veor d0,d3 + vshl.u64 d5,#16 + veor d0,d4 + vshr.u64 d6,#24 + veor d0,d5 + vshl.u64 d7,#8 + veor d0,d6 + veor d0,d7 + .word 0xe12fff1e +.size mul_1x1_neon,.-mul_1x1_neon +#endif +.type mul_1x1_ialu,%function +.align 5 +mul_1x1_ialu: + mov r4,#0 + bic r5,r1,#3<<30 @ a1=a&0x3fffffff + str r4,[sp,#0] @ tab[0]=0 + add r6,r5,r5 @ a2=a1<<1 + str r5,[sp,#4] @ tab[1]=a1 + eor r7,r5,r6 @ a1^a2 + str r6,[sp,#8] @ tab[2]=a2 + mov r8,r5,lsl#2 @ a4=a1<<2 + str r7,[sp,#12] @ tab[3]=a1^a2 + eor r9,r5,r8 @ a1^a4 + str r8,[sp,#16] @ tab[4]=a4 + eor r4,r6,r8 @ a2^a4 + str r9,[sp,#20] @ tab[5]=a1^a4 + eor r7,r7,r8 @ a1^a2^a4 + str r4,[sp,#24] @ tab[6]=a2^a4 + and r8,r12,r0,lsl#2 + str r7,[sp,#28] @ tab[7]=a1^a2^a4 + + and r9,r12,r0,lsr#1 + ldr r5,[sp,r8] @ tab[b & 0x7] + and r8,r12,r0,lsr#4 + ldr r7,[sp,r9] @ tab[b >> 3 & 0x7] + and r9,r12,r0,lsr#7 + ldr r6,[sp,r8] @ tab[b >> 6 & 0x7] + eor r5,r5,r7,lsl#3 @ stall + mov r4,r7,lsr#29 + ldr r7,[sp,r9] @ tab[b >> 9 & 0x7] + + and r8,r12,r0,lsr#10 + eor r5,r5,r6,lsl#6 + eor r4,r4,r6,lsr#26 + ldr r6,[sp,r8] @ tab[b >> 12 & 0x7] + + and r9,r12,r0,lsr#13 + eor r5,r5,r7,lsl#9 + eor r4,r4,r7,lsr#23 + ldr r7,[sp,r9] @ tab[b >> 15 & 0x7] + + and r8,r12,r0,lsr#16 + eor r5,r5,r6,lsl#12 + eor r4,r4,r6,lsr#20 + ldr r6,[sp,r8] @ tab[b >> 18 & 0x7] + + and r9,r12,r0,lsr#19 + eor r5,r5,r7,lsl#15 + eor r4,r4,r7,lsr#17 + ldr r7,[sp,r9] @ tab[b >> 21 & 0x7] + + and r8,r12,r0,lsr#22 + eor r5,r5,r6,lsl#18 + eor r4,r4,r6,lsr#14 + ldr r6,[sp,r8] @ tab[b >> 24 & 0x7] + + and r9,r12,r0,lsr#25 + eor r5,r5,r7,lsl#21 + eor r4,r4,r7,lsr#11 + ldr r7,[sp,r9] @ tab[b >> 27 & 0x7] + + tst r1,#1<<30 + and r8,r12,r0,lsr#28 + eor r5,r5,r6,lsl#24 + eor r4,r4,r6,lsr#8 + ldr r6,[sp,r8] @ tab[b >> 30 ] + + eorne r5,r5,r0,lsl#30 + eorne r4,r4,r0,lsr#2 + tst r1,#1<<31 + eor r5,r5,r7,lsl#27 + eor r4,r4,r7,lsr#5 + eorne r5,r5,r0,lsl#31 + eorne r4,r4,r0,lsr#1 + eor r5,r5,r6,lsl#30 + eor r4,r4,r6,lsr#2 + + mov pc,lr +.size mul_1x1_ialu,.-mul_1x1_ialu +.global bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,%function +.align 5 +bn_GF2m_mul_2x2: +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap +.Lpic: ldr r12,[pc,r12] + tst r12,#1 + beq .Lialu + + veor d18,d18 + vmov.32 d19,r3,r3 @ two copies of b1 + vmov.32 d18[0],r1 @ a1 + + veor d20,d20 + vld1.32 d21[],[sp,:32] @ two copies of b0 + vmov.32 d20[0],r2 @ a0 + mov r12,lr + + vmov d16,d18 + vmov d17,d19 + bl mul_1x1_neon @ a1·b1 + vmov d22,d0 + + vmov d16,d20 + vmov d17,d21 + bl mul_1x1_neon @ a0·b0 + vmov d23,d0 + + veor d16,d20,d18 + veor d17,d21,d19 + veor d20,d23,d22 + bl mul_1x1_neon @ (a0+a1)·(b0+b1) + + veor d0,d20 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 + vshl.u64 d1,d0,#32 + vshr.u64 d0,d0,#32 + veor d23,d1 + veor d22,d0 + vst1.32 {d23[0]},[r0,:32]! + vst1.32 {d23[1]},[r0,:32]! + vst1.32 {d22[0]},[r0,:32]! + vst1.32 {d22[1]},[r0,:32] + bx r12 +.align 4 +.Lialu: +#endif + stmdb sp!,{r4-r10,lr} + mov r10,r0 @ reassign 1st argument + mov r0,r3 @ r0=b1 + ldr r3,[sp,#32] @ load b0 + mov r12,#7<<2 + sub sp,sp,#32 @ allocate tab[8] + + bl mul_1x1_ialu @ a1·b1 + str r5,[r10,#8] + str r4,[r10,#12] + + eor r0,r0,r3 @ flip b0 and b1 + eor r1,r1,r2 @ flip a0 and a1 + eor r3,r3,r0 + eor r2,r2,r1 + eor r0,r0,r3 + eor r1,r1,r2 + bl mul_1x1_ialu @ a0·b0 + str r5,[r10] + str r4,[r10,#4] + + eor r1,r1,r2 + eor r0,r0,r3 + bl mul_1x1_ialu @ (a1+a0)·(b1+b0) + ldmia r10,{r6-r9} + eor r5,r5,r4 + eor r4,r4,r7 + eor r5,r5,r6 + eor r4,r4,r8 + eor r5,r5,r9 + eor r4,r4,r9 + str r4,[r10,#8] + eor r5,r5,r4 + add sp,sp,#32 @ destroy tab[8] + str r5,[r10,#4] + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r10,pc} +#else + ldmia sp!,{r4-r10,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +#if __ARM_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-(.Lpic+8) +#endif +.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " +.align 5 + +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/bn/asm/armv4-gf2m.pl b/app/openssl/crypto/bn/asm/armv4-gf2m.pl new file mode 100644 index 00000000..22ad1f85 --- /dev/null +++ b/app/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -0,0 +1,278 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# May 2011 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication +# used in bn_gf2m.c. It's kind of low-hanging mechanical port from +# C for the time being... Except that it has two code paths: pure +# integer code suitable for any ARMv4 and later CPU and NEON code +# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs +# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50% +# faster than compiler-generated code. For ECDH and ECDSA verify (but +# not for ECDSA sign) it means 25%-45% improvement depending on key +# length, more for longer keys. Even though NEON 1x1 multiplication +# runs in even less cycles, ~30, improvement is measurable only on +# longer keys. One has to optimize code elsewhere to get NEON glow... + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } +sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } + +$code=<<___; +#include "arm_arch.h" + +.text +.code 32 + +#if __ARM_ARCH__>=7 +.fpu neon + +.type mul_1x1_neon,%function +.align 5 +mul_1x1_neon: + vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a + vmull.p8 `&Q("d0")`,d16,d17 @ a·bb + vshl.u64 `&Dlo("q2")`,d16,#16 + vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb + vshl.u64 `&Dlo("q3")`,d16,#24 + vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb + vshr.u64 `&Dlo("q1")`,#8 + vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb + vshl.u64 `&Dhi("q1")`,#24 + veor d0,`&Dlo("q1")` + vshr.u64 `&Dlo("q2")`,#16 + veor d0,`&Dhi("q1")` + vshl.u64 `&Dhi("q2")`,#16 + veor d0,`&Dlo("q2")` + vshr.u64 `&Dlo("q3")`,#24 + veor d0,`&Dhi("q2")` + vshl.u64 `&Dhi("q3")`,#8 + veor d0,`&Dlo("q3")` + veor d0,`&Dhi("q3")` + bx lr +.size mul_1x1_neon,.-mul_1x1_neon +#endif +___ +################ +# private interface to mul_1x1_ialu +# +$a="r1"; +$b="r0"; + +($a0,$a1,$a2,$a12,$a4,$a14)= +($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); + +$mask="r12"; + +$code.=<<___; +.type mul_1x1_ialu,%function +.align 5 +mul_1x1_ialu: + mov $a0,#0 + bic $a1,$a,#3<<30 @ a1=a&0x3fffffff + str $a0,[sp,#0] @ tab[0]=0 + add $a2,$a1,$a1 @ a2=a1<<1 + str $a1,[sp,#4] @ tab[1]=a1 + eor $a12,$a1,$a2 @ a1^a2 + str $a2,[sp,#8] @ tab[2]=a2 + mov $a4,$a1,lsl#2 @ a4=a1<<2 + str $a12,[sp,#12] @ tab[3]=a1^a2 + eor $a14,$a1,$a4 @ a1^a4 + str $a4,[sp,#16] @ tab[4]=a4 + eor $a0,$a2,$a4 @ a2^a4 + str $a14,[sp,#20] @ tab[5]=a1^a4 + eor $a12,$a12,$a4 @ a1^a2^a4 + str $a0,[sp,#24] @ tab[6]=a2^a4 + and $i0,$mask,$b,lsl#2 + str $a12,[sp,#28] @ tab[7]=a1^a2^a4 + + and $i1,$mask,$b,lsr#1 + ldr $lo,[sp,$i0] @ tab[b & 0x7] + and $i0,$mask,$b,lsr#4 + ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7] + and $i1,$mask,$b,lsr#7 + ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7] + eor $lo,$lo,$t1,lsl#3 @ stall + mov $hi,$t1,lsr#29 + ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7] + + and $i0,$mask,$b,lsr#10 + eor $lo,$lo,$t0,lsl#6 + eor $hi,$hi,$t0,lsr#26 + ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7] + + and $i1,$mask,$b,lsr#13 + eor $lo,$lo,$t1,lsl#9 + eor $hi,$hi,$t1,lsr#23 + ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7] + + and $i0,$mask,$b,lsr#16 + eor $lo,$lo,$t0,lsl#12 + eor $hi,$hi,$t0,lsr#20 + ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7] + + and $i1,$mask,$b,lsr#19 + eor $lo,$lo,$t1,lsl#15 + eor $hi,$hi,$t1,lsr#17 + ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7] + + and $i0,$mask,$b,lsr#22 + eor $lo,$lo,$t0,lsl#18 + eor $hi,$hi,$t0,lsr#14 + ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7] + + and $i1,$mask,$b,lsr#25 + eor $lo,$lo,$t1,lsl#21 + eor $hi,$hi,$t1,lsr#11 + ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7] + + tst $a,#1<<30 + and $i0,$mask,$b,lsr#28 + eor $lo,$lo,$t0,lsl#24 + eor $hi,$hi,$t0,lsr#8 + ldr $t0,[sp,$i0] @ tab[b >> 30 ] + + eorne $lo,$lo,$b,lsl#30 + eorne $hi,$hi,$b,lsr#2 + tst $a,#1<<31 + eor $lo,$lo,$t1,lsl#27 + eor $hi,$hi,$t1,lsr#5 + eorne $lo,$lo,$b,lsl#31 + eorne $hi,$hi,$b,lsr#1 + eor $lo,$lo,$t0,lsl#30 + eor $hi,$hi,$t0,lsr#2 + + mov pc,lr +.size mul_1x1_ialu,.-mul_1x1_ialu +___ +################ +# void bn_GF2m_mul_2x2(BN_ULONG *r, +# BN_ULONG a1,BN_ULONG a0, +# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 + +($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); + +$code.=<<___; +.global bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,%function +.align 5 +bn_GF2m_mul_2x2: +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap +.Lpic: ldr r12,[pc,r12] + tst r12,#1 + beq .Lialu + + veor $A1,$A1 + vmov.32 $B1,r3,r3 @ two copies of b1 + vmov.32 ${A1}[0],r1 @ a1 + + veor $A0,$A0 + vld1.32 ${B0}[],[sp,:32] @ two copies of b0 + vmov.32 ${A0}[0],r2 @ a0 + mov r12,lr + + vmov d16,$A1 + vmov d17,$B1 + bl mul_1x1_neon @ a1·b1 + vmov $A1B1,d0 + + vmov d16,$A0 + vmov d17,$B0 + bl mul_1x1_neon @ a0·b0 + vmov $A0B0,d0 + + veor d16,$A0,$A1 + veor d17,$B0,$B1 + veor $A0,$A0B0,$A1B1 + bl mul_1x1_neon @ (a0+a1)·(b0+b1) + + veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 + vshl.u64 d1,d0,#32 + vshr.u64 d0,d0,#32 + veor $A0B0,d1 + veor $A1B1,d0 + vst1.32 {${A0B0}[0]},[r0,:32]! + vst1.32 {${A0B0}[1]},[r0,:32]! + vst1.32 {${A1B1}[0]},[r0,:32]! + vst1.32 {${A1B1}[1]},[r0,:32] + bx r12 +.align 4 +.Lialu: +#endif +___ +$ret="r10"; # reassigned 1st argument +$code.=<<___; + stmdb sp!,{r4-r10,lr} + mov $ret,r0 @ reassign 1st argument + mov $b,r3 @ $b=b1 + ldr r3,[sp,#32] @ load b0 + mov $mask,#7<<2 + sub sp,sp,#32 @ allocate tab[8] + + bl mul_1x1_ialu @ a1·b1 + str $lo,[$ret,#8] + str $hi,[$ret,#12] + + eor $b,$b,r3 @ flip b0 and b1 + eor $a,$a,r2 @ flip a0 and a1 + eor r3,r3,$b + eor r2,r2,$a + eor $b,$b,r3 + eor $a,$a,r2 + bl mul_1x1_ialu @ a0·b0 + str $lo,[$ret] + str $hi,[$ret,#4] + + eor $a,$a,r2 + eor $b,$b,r3 + bl mul_1x1_ialu @ (a1+a0)·(b1+b0) +___ +@r=map("r$_",(6..9)); +$code.=<<___; + ldmia $ret,{@r[0]-@r[3]} + eor $lo,$lo,$hi + eor $hi,$hi,@r[1] + eor $lo,$lo,@r[0] + eor $hi,$hi,@r[2] + eor $lo,$lo,@r[3] + eor $hi,$hi,@r[3] + str $hi,[$ret,#8] + eor $lo,$lo,$hi + add sp,sp,#32 @ destroy tab[8] + str $lo,[$ret,#4] + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r10,pc} +#else + ldmia sp!,{r4-r10,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +#if __ARM_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-(.Lpic+8) +#endif +.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " +.align 5 + +.comm OPENSSL_armcap_P,4,4 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; +close STDOUT; # enforce flush diff --git a/app/openssl/crypto/bn/asm/armv4-mont.S b/app/openssl/crypto/bn/asm/armv4-mont.S new file mode 120000 index 00000000..eb5cd951 --- /dev/null +++ b/app/openssl/crypto/bn/asm/armv4-mont.S @@ -0,0 +1 @@ +armv4-mont.s \ No newline at end of file diff --git a/app/openssl/crypto/bn/asm/armv4-mont.pl b/app/openssl/crypto/bn/asm/armv4-mont.pl index 14e0d2d1..f78a8b5f 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.pl +++ b/app/openssl/crypto/bn/asm/armv4-mont.pl @@ -23,6 +23,9 @@ # than 1/2KB. Windows CE port would be trivial, as it's exclusively # about decorations, ABI and instruction syntax are identical. +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + $num="r0"; # starts as num argument, but holds &tp[num-1] $ap="r1"; $bp="r2"; $bi="r2"; $rp="r2"; @@ -89,9 +92,9 @@ bn_mul_mont: .L1st: ldr $aj,[$ap],#4 @ ap[j],ap++ mov $alo,$ahi + ldr $nj,[$np],#4 @ np[j],np++ mov $ahi,#0 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] - ldr $nj,[$np],#4 @ np[j],np++ mov $nhi,#0 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 adds $nlo,$nlo,$alo @@ -101,21 +104,21 @@ bn_mul_mont: bne .L1st adds $nlo,$nlo,$ahi + ldr $tp,[$_bp] @ restore bp mov $nhi,#0 + ldr $n0,[$_n0] @ restore n0 adc $nhi,$nhi,#0 - ldr $tp,[$_bp] @ restore bp str $nlo,[$num] @ tp[num-1]= - ldr $n0,[$_n0] @ restore n0 str $nhi,[$num,#4] @ tp[num]= .Louter: sub $tj,$num,sp @ "original" $num-1 value sub $ap,$ap,$tj @ "rewind" ap to &ap[1] - sub $np,$np,$tj @ "rewind" np to &np[1] ldr $bi,[$tp,#4]! @ *(++bp) + sub $np,$np,$tj @ "rewind" np to &np[1] ldr $aj,[$ap,#-4] @ ap[0] - ldr $nj,[$np,#-4] @ np[0] ldr $alo,[sp] @ tp[0] + ldr $nj,[$np,#-4] @ np[0] ldr $tj,[sp,#4] @ tp[1] mov $ahi,#0 @@ -129,13 +132,13 @@ bn_mul_mont: .Linner: ldr $aj,[$ap],#4 @ ap[j],ap++ adds $alo,$ahi,$tj @ +=tp[j] + ldr $nj,[$np],#4 @ np[j],np++ mov $ahi,#0 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] - ldr $nj,[$np],#4 @ np[j],np++ mov $nhi,#0 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 - ldr $tj,[$tp,#8] @ tp[j+1] adc $ahi,$ahi,#0 + ldr $tj,[$tp,#8] @ tp[j+1] adds $nlo,$nlo,$alo str $nlo,[$tp],#4 @ tp[j-1]=,tp++ adc $nlo,$nhi,#0 @@ -144,13 +147,13 @@ bn_mul_mont: adds $nlo,$nlo,$ahi mov $nhi,#0 + ldr $tp,[$_bp] @ restore bp adc $nhi,$nhi,#0 + ldr $n0,[$_n0] @ restore n0 adds $nlo,$nlo,$tj - adc $nhi,$nhi,#0 - ldr $tp,[$_bp] @ restore bp ldr $tj,[$_bpend] @ restore &bp[num] + adc $nhi,$nhi,#0 str $nlo,[$num] @ tp[num-1]= - ldr $n0,[$_n0] @ restore n0 str $nhi,[$num,#4] @ tp[num]= cmp $tp,$tj diff --git a/app/openssl/crypto/bn/asm/armv4-mont.s b/app/openssl/crypto/bn/asm/armv4-mont.s index 0488455f..64c220b5 100644 --- a/app/openssl/crypto/bn/asm/armv4-mont.s +++ b/app/openssl/crypto/bn/asm/armv4-mont.s @@ -38,9 +38,9 @@ bn_mul_mont: .L1st: ldr r5,[r1],#4 @ ap[j],ap++ mov r10,r11 + ldr r6,[r3],#4 @ np[j],np++ mov r11,#0 umlal r10,r11,r5,r2 @ ap[j]*bp[0] - ldr r6,[r3],#4 @ np[j],np++ mov r14,#0 umlal r12,r14,r6,r8 @ np[j]*n0 adds r12,r12,r10 @@ -50,21 +50,21 @@ bn_mul_mont: bne .L1st adds r12,r12,r11 + ldr r4,[r0,#13*4] @ restore bp mov r14,#0 + ldr r8,[r0,#14*4] @ restore n0 adc r14,r14,#0 - ldr r4,[r0,#13*4] @ restore bp str r12,[r0] @ tp[num-1]= - ldr r8,[r0,#14*4] @ restore n0 str r14,[r0,#4] @ tp[num]= .Louter: sub r7,r0,sp @ "original" r0-1 value sub r1,r1,r7 @ "rewind" ap to &ap[1] - sub r3,r3,r7 @ "rewind" np to &np[1] ldr r2,[r4,#4]! @ *(++bp) + sub r3,r3,r7 @ "rewind" np to &np[1] ldr r5,[r1,#-4] @ ap[0] - ldr r6,[r3,#-4] @ np[0] ldr r10,[sp] @ tp[0] + ldr r6,[r3,#-4] @ np[0] ldr r7,[sp,#4] @ tp[1] mov r11,#0 @@ -78,13 +78,13 @@ bn_mul_mont: .Linner: ldr r5,[r1],#4 @ ap[j],ap++ adds r10,r11,r7 @ +=tp[j] + ldr r6,[r3],#4 @ np[j],np++ mov r11,#0 umlal r10,r11,r5,r2 @ ap[j]*bp[i] - ldr r6,[r3],#4 @ np[j],np++ mov r14,#0 umlal r12,r14,r6,r8 @ np[j]*n0 - ldr r7,[r4,#8] @ tp[j+1] adc r11,r11,#0 + ldr r7,[r4,#8] @ tp[j+1] adds r12,r12,r10 str r12,[r4],#4 @ tp[j-1]=,tp++ adc r12,r14,#0 @@ -93,13 +93,13 @@ bn_mul_mont: adds r12,r12,r11 mov r14,#0 + ldr r4,[r0,#13*4] @ restore bp adc r14,r14,#0 + ldr r8,[r0,#14*4] @ restore n0 adds r12,r12,r7 - adc r14,r14,#0 - ldr r4,[r0,#13*4] @ restore bp ldr r7,[r0,#15*4] @ restore &bp[num] + adc r14,r14,#0 str r12,[r0] @ tp[num-1]= - ldr r8,[r0,#14*4] @ restore n0 str r14,[r0,#4] @ tp[num]= cmp r4,r7 diff --git a/app/openssl/crypto/bn/asm/bn-586.S b/app/openssl/crypto/bn/asm/bn-586.S new file mode 100644 index 00000000..66695e26 --- /dev/null +++ b/app/openssl/crypto/bn/asm/bn-586.S @@ -0,0 +1,1533 @@ +.file "crypto/bn/asm/bn-586.s" +.text +.globl bn_mul_add_words +.type bn_mul_add_words,@function +.align 16 +bn_mul_add_words: +.L_bn_mul_add_words_begin: + call .L000PIC_me_up +.L000PIC_me_up: + popl %eax + leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%eax),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc .L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp .L002maw_sse2_entry +.align 16 +.L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz .L004maw_sse2_exit +.L002maw_sse2_entry: + testl $4294967288,%ecx + jnz .L003maw_sse2_unrolled +.align 4 +.L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L005maw_sse2_loop +.L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 16 +.L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz .L006maw_finish +.align 16 +.L007maw_loop: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz .L007maw_loop +.L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz .L008maw_finish2 + jmp .L009maw_end +.L008maw_finish2: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_add_words,.-.L_bn_mul_add_words_begin +.globl bn_mul_words +.type bn_mul_words,@function +.align 16 +bn_mul_words: +.L_bn_mul_words_begin: + call .L010PIC_me_up +.L010PIC_me_up: + popl %eax + leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%eax),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc .L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 16 +.L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 16 +.L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz .L013mw_finish +.L014mw_loop: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz .L013mw_finish + jmp .L014mw_loop +.L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz .L015mw_finish2 + jmp .L016mw_end +.L015mw_finish2: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_words,.-.L_bn_mul_words_begin +.globl bn_sqr_words +.type bn_sqr_words,@function +.align 16 +bn_sqr_words: +.L_bn_sqr_words_begin: + call .L017PIC_me_up +.L017PIC_me_up: + popl %eax + leal _GLOBAL_OFFSET_TABLE_+[.-.L017PIC_me_up](%eax),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc .L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 16 +.L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz .L019sqr_sse2_loop + emms + ret +.align 16 +.L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz .L020sw_finish +.L021sw_loop: + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz .L021sw_loop +.L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz .L022sw_end + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz .L022sw_end + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz .L022sw_end + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz .L022sw_end + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz .L022sw_end + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz .L022sw_end + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz .L022sw_end + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +.L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sqr_words,.-.L_bn_sqr_words_begin +.globl bn_div_words +.type bn_div_words,@function +.align 16 +bn_div_words: +.L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.size bn_div_words,.-.L_bn_div_words_begin +.globl bn_add_words +.type bn_add_words,@function +.align 16 +bn_add_words: +.L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L023aw_finish +.L024aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L024aw_loop +.L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L025aw_end + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L025aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L025aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L025aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L025aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L025aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L025aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_add_words,.-.L_bn_add_words_begin +.globl bn_sub_words +.type bn_sub_words,@function +.align 16 +bn_sub_words: +.L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L026aw_finish +.L027aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L027aw_loop +.L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L028aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L028aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L028aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L028aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L028aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L028aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_words,.-.L_bn_sub_words_begin +.globl bn_sub_part_words +.type bn_sub_part_words,@function +.align 16 +bn_sub_part_words: +.L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L029aw_finish +.L030aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L030aw_loop +.L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +.L031aw_end: + cmpl $0,36(%esp) + je .L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je .L032pw_end + jge .L033pw_pos + + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz .L034pw_neg_finish +.L035pw_neg_loop: + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L035pw_neg_loop +.L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz .L032pw_end + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp .L032pw_end +.L033pw_pos: + andl $4294967288,%ebp + jz .L036pw_pos_finish +.L037pw_pos_loop: + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L038pw_nc0 + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L039pw_nc1 + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L040pw_nc2 + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L041pw_nc3 + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L042pw_nc4 + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L043pw_nc5 + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L044pw_nc6 + + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc .L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L037pw_pos_loop +.L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz .L032pw_end + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L046pw_tail_nc0 + decl %ebp + jz .L032pw_end + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L047pw_tail_nc1 + decl %ebp + jz .L032pw_end + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L048pw_tail_nc2 + decl %ebp + jz .L032pw_end + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L049pw_tail_nc3 + decl %ebp + jz .L032pw_end + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L050pw_tail_nc4 + decl %ebp + jz .L032pw_end + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L051pw_tail_nc5 + decl %ebp + jz .L032pw_end + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L052pw_tail_nc6 + movl $1,%eax + jmp .L032pw_end +.L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +.L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +.L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz .L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +.L046pw_tail_nc0: + decl %ebp + jz .L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L047pw_tail_nc1: + decl %ebp + jz .L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L048pw_tail_nc2: + decl %ebp + jz .L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L049pw_tail_nc3: + decl %ebp + jz .L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L050pw_tail_nc4: + decl %ebp + jz .L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L051pw_tail_nc5: + decl %ebp + jz .L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L052pw_tail_nc6: +.L054pw_nc_end: + movl $0,%eax +.L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_part_words,.-.L_bn_sub_part_words_begin +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/bn/asm/bn-mips.S b/app/openssl/crypto/bn/asm/bn-mips.S new file mode 100644 index 00000000..2e7cccb7 --- /dev/null +++ b/app/openssl/crypto/bn/asm/bn-mips.S @@ -0,0 +1,2175 @@ +.set mips2 +.rdata +.asciiz "mips3.s, Version 1.2" +.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov " + +.text +.set noat + +.align 5 +.globl bn_mul_add_words +.ent bn_mul_add_words +bn_mul_add_words: + .set noreorder + bgtz $6,bn_mul_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_add_words + +.align 5 +.ent bn_mul_add_words_internal +bn_mul_add_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + lw $12,0($5) + multu $12,$7 + lw $13,0($4) + lw $14,4($5) + lw $15,4($4) + lw $8,2*4($5) + lw $9,2*4($4) + addu $13,$2 + sltu $2,$13,$2 # All manuals say it "compares 32-bit + # values", but it seems to work fine + # even on 64-bit registers. + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + multu $14,$7 + sltu $1,$13,$1 + sw $13,0($4) + addu $2,$1 + + lw $10,3*4($5) + lw $11,3*4($4) + addu $15,$2 + sltu $2,$15,$2 + mflo $1 + mfhi $14 + addu $15,$1 + addu $2,$14 + multu $8,$7 + sltu $1,$15,$1 + sw $15,4($4) + addu $2,$1 + + subu $6,4 + addu $4,4*4 + addu $5,4*4 + addu $9,$2 + sltu $2,$9,$2 + mflo $1 + mfhi $8 + addu $9,$1 + addu $2,$8 + multu $10,$7 + sltu $1,$9,$1 + sw $9,-2*4($4) + addu $2,$1 + + + and $8,$6,$3 + addu $11,$2 + sltu $2,$11,$2 + mflo $1 + mfhi $10 + addu $11,$1 + addu $2,$10 + sltu $1,$11,$1 + sw $11,-4($4) + .set noreorder + bgtz $8,.L_bn_mul_add_words_loop + addu $2,$1 + + beqz $6,.L_bn_mul_add_words_return + nop + +.L_bn_mul_add_words_tail: + .set reorder + lw $12,0($5) + multu $12,$7 + lw $13,0($4) + subu $6,1 + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,0($4) + addu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + lw $12,4($5) + multu $12,$7 + lw $13,4($4) + subu $6,1 + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,4($4) + addu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + lw $12,2*4($5) + multu $12,$7 + lw $13,2*4($4) + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,2*4($4) + addu $2,$1 + +.L_bn_mul_add_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_add_words_internal + +.align 5 +.globl bn_mul_words +.ent bn_mul_words +bn_mul_words: + .set noreorder + bgtz $6,bn_mul_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_words + +.align 5 +.ent bn_mul_words_internal +bn_mul_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + lw $12,0($5) + multu $12,$7 + lw $14,4($5) + lw $8,2*4($5) + lw $10,3*4($5) + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + multu $14,$7 + sw $2,0($4) + addu $2,$13,$12 + + subu $6,4 + addu $4,4*4 + addu $5,4*4 + mflo $1 + mfhi $14 + addu $2,$1 + sltu $15,$2,$1 + multu $8,$7 + sw $2,-3*4($4) + addu $2,$15,$14 + + mflo $1 + mfhi $8 + addu $2,$1 + sltu $9,$2,$1 + multu $10,$7 + sw $2,-2*4($4) + addu $2,$9,$8 + + and $8,$6,$3 + mflo $1 + mfhi $10 + addu $2,$1 + sltu $11,$2,$1 + sw $2,-4($4) + .set noreorder + bgtz $8,.L_bn_mul_words_loop + addu $2,$11,$10 + + beqz $6,.L_bn_mul_words_return + nop + +.L_bn_mul_words_tail: + .set reorder + lw $12,0($5) + multu $12,$7 + subu $6,1 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,0($4) + addu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + lw $12,4($5) + multu $12,$7 + subu $6,1 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,4($4) + addu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + lw $12,2*4($5) + multu $12,$7 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,2*4($4) + addu $2,$13,$12 + +.L_bn_mul_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_words_internal + +.align 5 +.globl bn_sqr_words +.ent bn_sqr_words +bn_sqr_words: + .set noreorder + bgtz $6,bn_sqr_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_sqr_words + +.align 5 +.ent bn_sqr_words_internal +bn_sqr_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + lw $12,0($5) + multu $12,$12 + lw $14,4($5) + lw $8,2*4($5) + lw $10,3*4($5) + mflo $13 + mfhi $12 + sw $13,0($4) + sw $12,4($4) + + multu $14,$14 + subu $6,4 + addu $4,8*4 + addu $5,4*4 + mflo $15 + mfhi $14 + sw $15,-6*4($4) + sw $14,-5*4($4) + + multu $8,$8 + mflo $9 + mfhi $8 + sw $9,-4*4($4) + sw $8,-3*4($4) + + + multu $10,$10 + and $8,$6,$3 + mflo $11 + mfhi $10 + sw $11,-2*4($4) + + .set noreorder + bgtz $8,.L_bn_sqr_words_loop + sw $10,-4($4) + + beqz $6,.L_bn_sqr_words_return + nop + +.L_bn_sqr_words_tail: + .set reorder + lw $12,0($5) + multu $12,$12 + subu $6,1 + mflo $13 + mfhi $12 + sw $13,0($4) + sw $12,4($4) + beqz $6,.L_bn_sqr_words_return + + lw $12,4($5) + multu $12,$12 + subu $6,1 + mflo $13 + mfhi $12 + sw $13,2*4($4) + sw $12,3*4($4) + beqz $6,.L_bn_sqr_words_return + + lw $12,2*4($5) + multu $12,$12 + mflo $13 + mfhi $12 + sw $13,4*4($4) + sw $12,5*4($4) + +.L_bn_sqr_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_sqr_words_internal + +.align 5 +.globl bn_add_words +.ent bn_add_words +bn_add_words: + .set noreorder + bgtz $7,bn_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_add_words + +.align 5 +.ent bn_add_words_internal +bn_add_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_add_words_tail + +.L_bn_add_words_loop: + lw $12,0($5) + lw $8,0($6) + subu $7,4 + lw $13,4($5) + and $1,$7,$3 + lw $14,2*4($5) + addu $6,4*4 + lw $15,3*4($5) + addu $4,4*4 + lw $9,-3*4($6) + addu $5,4*4 + lw $10,-2*4($6) + lw $11,-4($6) + addu $8,$12 + sltu $24,$8,$12 + addu $12,$8,$2 + sltu $2,$12,$8 + sw $12,-4*4($4) + addu $2,$24 + + addu $9,$13 + sltu $25,$9,$13 + addu $13,$9,$2 + sltu $2,$13,$9 + sw $13,-3*4($4) + addu $2,$25 + + addu $10,$14 + sltu $24,$10,$14 + addu $14,$10,$2 + sltu $2,$14,$10 + sw $14,-2*4($4) + addu $2,$24 + + addu $11,$15 + sltu $25,$11,$15 + addu $15,$11,$2 + sltu $2,$15,$11 + sw $15,-4($4) + + .set noreorder + bgtz $1,.L_bn_add_words_loop + addu $2,$25 + + beqz $7,.L_bn_add_words_return + nop + +.L_bn_add_words_tail: + .set reorder + lw $12,0($5) + lw $8,0($6) + addu $8,$12 + subu $7,1 + sltu $24,$8,$12 + addu $12,$8,$2 + sltu $2,$12,$8 + sw $12,0($4) + addu $2,$24 + beqz $7,.L_bn_add_words_return + + lw $13,4($5) + lw $9,4($6) + addu $9,$13 + subu $7,1 + sltu $25,$9,$13 + addu $13,$9,$2 + sltu $2,$13,$9 + sw $13,4($4) + addu $2,$25 + beqz $7,.L_bn_add_words_return + + lw $14,2*4($5) + lw $10,2*4($6) + addu $10,$14 + sltu $24,$10,$14 + addu $14,$10,$2 + sltu $2,$14,$10 + sw $14,2*4($4) + addu $2,$24 + +.L_bn_add_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_add_words_internal + +.align 5 +.globl bn_sub_words +.ent bn_sub_words +bn_sub_words: + .set noreorder + bgtz $7,bn_sub_words_internal + move $2,$0 + jr $31 + move $4,$0 +.end bn_sub_words + +.align 5 +.ent bn_sub_words_internal +bn_sub_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + lw $12,0($5) + lw $8,0($6) + subu $7,4 + lw $13,4($5) + and $1,$7,$3 + lw $14,2*4($5) + addu $6,4*4 + lw $15,3*4($5) + addu $4,4*4 + lw $9,-3*4($6) + addu $5,4*4 + lw $10,-2*4($6) + lw $11,-4($6) + sltu $24,$12,$8 + subu $8,$12,$8 + subu $12,$8,$2 + sgtu $2,$12,$8 + sw $12,-4*4($4) + addu $2,$24 + + sltu $25,$13,$9 + subu $9,$13,$9 + subu $13,$9,$2 + sgtu $2,$13,$9 + sw $13,-3*4($4) + addu $2,$25 + + + sltu $24,$14,$10 + subu $10,$14,$10 + subu $14,$10,$2 + sgtu $2,$14,$10 + sw $14,-2*4($4) + addu $2,$24 + + sltu $25,$15,$11 + subu $11,$15,$11 + subu $15,$11,$2 + sgtu $2,$15,$11 + sw $15,-4($4) + + .set noreorder + bgtz $1,.L_bn_sub_words_loop + addu $2,$25 + + beqz $7,.L_bn_sub_words_return + nop + +.L_bn_sub_words_tail: + .set reorder + lw $12,0($5) + lw $8,0($6) + subu $7,1 + sltu $24,$12,$8 + subu $8,$12,$8 + subu $12,$8,$2 + sgtu $2,$12,$8 + sw $12,0($4) + addu $2,$24 + beqz $7,.L_bn_sub_words_return + + lw $13,4($5) + subu $7,1 + lw $9,4($6) + sltu $25,$13,$9 + subu $9,$13,$9 + subu $13,$9,$2 + sgtu $2,$13,$9 + sw $13,4($4) + addu $2,$25 + beqz $7,.L_bn_sub_words_return + + lw $14,2*4($5) + lw $10,2*4($6) + sltu $24,$14,$10 + subu $10,$14,$10 + subu $14,$10,$2 + sgtu $2,$14,$10 + sw $14,2*4($4) + addu $2,$24 + +.L_bn_sub_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_sub_words_internal + +.align 5 +.globl bn_div_3_words +.ent bn_div_3_words +bn_div_3_words: + .set noreorder + move $7,$4 # we know that bn_div_words does not + # touch $7, $10, $11 and preserves $6 + # so that we can save two arguments + # and return address in registers + # instead of stack:-) + + lw $4,($7) + move $10,$5 + bne $4,$6,bn_div_3_words_internal + lw $5,-4($7) + li $2,-1 + jr $31 + move $4,$2 +.end bn_div_3_words + +.align 5 +.ent bn_div_3_words_internal +bn_div_3_words_internal: + .set reorder + move $11,$31 + bal bn_div_words_internal + move $31,$11 + multu $10,$2 + lw $14,-2*4($7) + move $8,$0 + mfhi $13 + mflo $12 + sltu $24,$13,$5 +.L_bn_div_3_words_inner_loop: + bnez $24,.L_bn_div_3_words_inner_loop_done + sgeu $1,$14,$12 + seq $25,$13,$5 + and $1,$25 + sltu $15,$12,$10 + addu $5,$6 + subu $13,$15 + subu $12,$10 + sltu $24,$13,$5 + sltu $8,$5,$6 + or $24,$8 + .set noreorder + beqz $1,.L_bn_div_3_words_inner_loop + subu $2,1 + addu $2,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + .set noreorder + jr $31 + move $4,$2 +.end bn_div_3_words_internal + +.align 5 +.globl bn_div_words +.ent bn_div_words +bn_div_words: + .set noreorder + bnez $6,bn_div_words_internal + li $2,-1 # I would rather signal div-by-zero + # which can be done with 'break 7' + jr $31 + move $4,$2 +.end bn_div_words + +.align 5 +.ent bn_div_words_internal +bn_div_words_internal: + move $3,$0 + bltz $6,.L_bn_div_words_body + move $25,$3 + sll $6,1 + bgtz $6,.-4 + addu $25,1 + + .set reorder + negu $13,$25 + li $14,-1 + sll $14,$13 + and $14,$4 + srl $1,$5,$13 + .set noreorder + beqz $14,.+12 + nop + break 6 # signal overflow + .set reorder + sll $4,$25 + sll $5,$25 + or $4,$1 +.L_bn_div_words_body: + srl $3,$6,4*4 # bits + sgeu $1,$4,$6 + .set noreorder + beqz $1,.+12 + nop + subu $4,$6 + .set reorder + + li $8,-1 + srl $9,$4,4*4 # bits + srl $8,4*4 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div1 + divu $0,$4,$3 + mflo $8 +.L_bn_div_words_skip_div1: + multu $6,$8 + sll $15,$4,4*4 # bits + srl $1,$5,4*4 # bits + or $15,$1 + mflo $12 + mfhi $13 +.L_bn_div_words_inner_loop1: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $2,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop1_done + subu $13,$2 + subu $12,$6 + b .L_bn_div_words_inner_loop1 + subu $8,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + sll $5,4*4 # bits + subu $4,$15,$12 + sll $2,$8,4*4 # bits + + li $8,-1 + srl $9,$4,4*4 # bits + srl $8,4*4 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div2 + divu $0,$4,$3 + mflo $8 +.L_bn_div_words_skip_div2: + multu $6,$8 + sll $15,$4,4*4 # bits + srl $1,$5,4*4 # bits + or $15,$1 + mflo $12 + mfhi $13 +.L_bn_div_words_inner_loop2: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $3,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop2_done + subu $13,$3 + subu $12,$6 + b .L_bn_div_words_inner_loop2 + subu $8,1 + .set reorder +.L_bn_div_words_inner_loop2_done: + + subu $4,$15,$12 + or $2,$8 + srl $3,$4,$25 # $3 contains remainder if anybody wants it + srl $6,$25 # restore $6 + + .set noreorder + move $5,$3 + jr $31 + move $4,$2 +.end bn_div_words_internal + +.align 5 +.globl bn_mul_comba8 +.ent bn_mul_comba8 +bn_mul_comba8: + .set noreorder + .frame $29,6*4,$31 + .mask 0x003f0000,-4 + subu $29,6*4 + sw $21,5*4($29) + sw $20,4*4($29) + sw $19,3*4($29) + sw $18,2*4($29) + sw $17,1*4($29) + sw $16,0*4($29) + + .set reorder + lw $12,0($5) # If compiled with -mips3 option on + # R5000 box assembler barks on this + # 1ine with "should not have mult/div + # as last instruction in bb (R10K + # bug)" warning. If anybody out there + # has a clue about how to circumvent + # this do send me a note. + # + + lw $8,0($6) + lw $13,4($5) + lw $14,2*4($5) + multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $15,3*4($5) + lw $9,4($6) + lw $10,2*4($6) + lw $11,3*4($6) + mflo $2 + mfhi $3 + + lw $16,4*4($5) + lw $18,5*4($5) + multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); + lw $20,6*4($5) + lw $5,7*4($5) + lw $17,4*4($6) + lw $19,5*4($6) + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); + addu $7,$25,$1 + lw $21,6*4($6) + lw $6,7*4($6) + sw $2,0($4) # r[0]=c1; + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + sw $3,4($4) # r[1]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) # r[2]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) # r[3]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) # r[4]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) # r[5]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,6*4($4) # r[6]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,7*4($4) # r[7]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,8*4($4) # r[8]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,9*4($4) # r[9]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,10*4($4) # r[10]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,11*4($4) # r[11]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,12*4($4) # r[12]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,13*4($4) # r[13]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sw $7,14*4($4) # r[14]=c3; + sw $2,15*4($4) # r[15]=c1; + + .set noreorder + lw $21,5*4($29) + lw $20,4*4($29) + lw $19,3*4($29) + lw $18,2*4($29) + lw $17,1*4($29) + lw $16,0*4($29) + jr $31 + addu $29,6*4 +.end bn_mul_comba8 + +.align 5 +.globl bn_mul_comba4 +.ent bn_mul_comba4 +bn_mul_comba4: + .set reorder + lw $12,0($5) + lw $8,0($6) + lw $13,4($5) + lw $14,2*4($5) + multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $15,3*4($5) + lw $9,4($6) + lw $10,2*4($6) + lw $11,3*4($6) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); + addu $7,$25,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + sw $3,4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sw $2,6*4($4) + sw $3,7*4($4) + + .set noreorder + jr $31 + nop +.end bn_mul_comba4 + +.align 5 +.globl bn_sqr_comba8 +.ent bn_sqr_comba8 +bn_sqr_comba8: + .set reorder + lw $12,0($5) + lw $13,4($5) + lw $14,2*4($5) + lw $15,3*4($5) + + multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $8,4*4($5) + lw $9,5*4($5) + lw $10,6*4($5) + lw $11,7*4($5) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $7,$25,$1 + sw $3,4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); + addu $3,$1 + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,6*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,7*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,8*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,9*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,10*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,11*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,12*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,13*4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sw $7,14*4($4) + sw $2,15*4($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba8 + +.align 5 +.globl bn_sqr_comba4 +.ent bn_sqr_comba4 +bn_sqr_comba4: + .set reorder + lw $12,0($5) + lw $13,4($5) + multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $14,2*4($5) + lw $15,3*4($5) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $7,$25,$1 + sw $3,4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sw $2,6*4($4) + sw $3,7*4($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba4 diff --git a/app/openssl/crypto/bn/asm/co-586.S b/app/openssl/crypto/bn/asm/co-586.S new file mode 100644 index 00000000..3cb80735 --- /dev/null +++ b/app/openssl/crypto/bn/asm/co-586.S @@ -0,0 +1,1254 @@ +.file "crypto/bn/asm/co-586.s" +.text +.globl bn_mul_comba8 +.type bn_mul_comba8,@function +.align 16 +bn_mul_comba8: +.L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + + + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba8,.-.L_bn_mul_comba8_begin +.globl bn_mul_comba4 +.type bn_mul_comba4,@function +.align 16 +bn_mul_comba4: +.L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + + + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba4,.-.L_bn_mul_comba4_begin +.globl bn_sqr_comba8 +.type bn_sqr_comba8,@function +.align 16 +bn_sqr_comba8: +.L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + + + xorl %ecx,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin +.globl bn_sqr_comba4 +.type bn_sqr_comba4,@function +.align 16 +bn_sqr_comba4: +.L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin diff --git a/app/openssl/crypto/bn/asm/ia64-mont.pl b/app/openssl/crypto/bn/asm/ia64-mont.pl new file mode 100644 index 00000000..e2586584 --- /dev/null +++ b/app/openssl/crypto/bn/asm/ia64-mont.pl @@ -0,0 +1,851 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# January 2010 +# +# "Teaser" Montgomery multiplication module for IA-64. There are +# several possibilities for improvement: +# +# - modulo-scheduling outer loop would eliminate quite a number of +# stalls after ldf8, xma and getf.sig outside inner loop and +# improve shorter key performance; +# - shorter vector support [with input vectors being fetched only +# once] should be added; +# - 2x unroll with help of n0[1] would make the code scalable on +# "wider" IA-64, "wider" than Itanium 2 that is, which is not of +# acute interest, because upcoming Tukwila's individual cores are +# reportedly based on Itanium 2 design; +# - dedicated squaring procedure(?); +# +# January 2010 +# +# Shorter vector support is implemented by zero-padding ap and np +# vectors up to 8 elements, or 512 bits. This means that 256-bit +# inputs will be processed only 2 times faster than 512-bit inputs, +# not 4 [as one would expect, because algorithm complexity is n^2]. +# The reason for padding is that inputs shorter than 512 bits won't +# be processed faster anyway, because minimal critical path of the +# core loop happens to match 512-bit timing. Either way, it resulted +# in >100% improvement of 512-bit RSA sign benchmark and 50% - of +# 1024-bit one [in comparison to original version of *this* module]. +# +# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with* +# this module is: +# sign verify sign/s verify/s +# rsa 512 bits 0.000290s 0.000024s 3452.8 42031.4 +# rsa 1024 bits 0.000793s 0.000058s 1261.7 17172.0 +# rsa 2048 bits 0.005908s 0.000148s 169.3 6754.0 +# rsa 4096 bits 0.033456s 0.000469s 29.9 2133.6 +# dsa 512 bits 0.000253s 0.000198s 3949.9 5057.0 +# dsa 1024 bits 0.000585s 0.000607s 1708.4 1647.4 +# dsa 2048 bits 0.001453s 0.001703s 688.1 587.4 +# +# ... and *without* (but still with ia64.S): +# +# rsa 512 bits 0.000670s 0.000041s 1491.8 24145.5 +# rsa 1024 bits 0.001988s 0.000080s 502.9 12499.3 +# rsa 2048 bits 0.008702s 0.000189s 114.9 5293.9 +# rsa 4096 bits 0.043860s 0.000533s 22.8 1875.9 +# dsa 512 bits 0.000441s 0.000427s 2265.3 2340.6 +# dsa 1024 bits 0.000823s 0.000867s 1215.6 1153.2 +# dsa 2048 bits 0.001894s 0.002179s 528.1 458.9 +# +# As it can be seen, RSA sign performance improves by 130-30%, +# hereafter less for longer keys, while verify - by 74-13%. +# DSA performance improves by 115-30%. + +if ($^O eq "hpux") { + $ADDP="addp4"; + for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } +} else { $ADDP="add"; } + +$code=<<___; +.explicit +.text + +// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap, +// const BN_ULONG *bp,const BN_ULONG *np, +// const BN_ULONG *n0p,int num); +.align 64 +.global bn_mul_mont# +.proc bn_mul_mont# +bn_mul_mont: + .prologue + .body +{ .mmi; cmp4.le p6,p7=2,r37;; +(p6) cmp4.lt.unc p8,p9=8,r37 + mov ret0=r0 };; +{ .bbb; +(p9) br.cond.dptk.many bn_mul_mont_8 +(p8) br.cond.dpnt.many bn_mul_mont_general +(p7) br.ret.spnt.many b0 };; +.endp bn_mul_mont# + +prevfs=r2; prevpr=r3; prevlc=r10; prevsp=r11; + +rptr=r8; aptr=r9; bptr=r14; nptr=r15; +tptr=r16; // &tp[0] +tp_1=r17; // &tp[-1] +num=r18; len=r19; lc=r20; +topbit=r21; // carry bit from tmp[num] + +n0=f6; +m0=f7; +bi=f8; + +.align 64 +.local bn_mul_mont_general# +.proc bn_mul_mont_general# +bn_mul_mont_general: + .prologue +{ .mmi; .save ar.pfs,prevfs + alloc prevfs=ar.pfs,6,2,0,8 + $ADDP aptr=0,in1 + .save ar.lc,prevlc + mov prevlc=ar.lc } +{ .mmi; .vframe prevsp + mov prevsp=sp + $ADDP bptr=0,in2 + .save pr,prevpr + mov prevpr=pr };; + + .body + .rotf alo[6],nlo[4],ahi[8],nhi[6] + .rotr a[3],n[3],t[2] + +{ .mmi; ldf8 bi=[bptr],8 // (*bp++) + ldf8 alo[4]=[aptr],16 // ap[0] + $ADDP r30=8,in1 };; +{ .mmi; ldf8 alo[3]=[r30],16 // ap[1] + ldf8 alo[2]=[aptr],16 // ap[2] + $ADDP in4=0,in4 };; +{ .mmi; ldf8 alo[1]=[r30] // ap[3] + ldf8 n0=[in4] // n0 + $ADDP rptr=0,in0 } +{ .mmi; $ADDP nptr=0,in3 + mov r31=16 + zxt4 num=in5 };; +{ .mmi; ldf8 nlo[2]=[nptr],8 // np[0] + shladd len=num,3,r0 + shladd r31=num,3,r31 };; +{ .mmi; ldf8 nlo[1]=[nptr],8 // np[1] + add lc=-5,num + sub r31=sp,r31 };; +{ .mfb; and sp=-16,r31 // alloca + xmpy.hu ahi[2]=alo[4],bi // ap[0]*bp[0] + nop.b 0 } +{ .mfb; nop.m 0 + xmpy.lu alo[4]=alo[4],bi + brp.loop.imp .L1st_ctop,.L1st_cend-16 + };; +{ .mfi; nop.m 0 + xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[0] + add tp_1=8,sp } +{ .mfi; nop.m 0 + xma.lu alo[3]=alo[3],bi,ahi[2] + mov pr.rot=0x20001f<<16 + // ------^----- (p40) at first (p23) + // ----------^^ p[16:20]=1 + };; +{ .mfi; nop.m 0 + xmpy.lu m0=alo[4],n0 // (ap[0]*bp[0])*n0 + mov ar.lc=lc } +{ .mfi; nop.m 0 + fcvt.fxu.s1 nhi[1]=f0 + mov ar.ec=8 };; + +.align 32 +.L1st_ctop: +.pred.rel "mutex",p40,p42 +{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++) + (p18) xma.hu ahi[0]=alo[2],bi,ahi[1] + (p40) add n[2]=n[2],a[2] } // (p23) } +{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)(p16) + (p18) xma.lu alo[2]=alo[2],bi,ahi[1] + (p42) add n[2]=n[2],a[2],1 };; // (p23) +{ .mfi; (p21) getf.sig a[0]=alo[5] + (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1] + (p42) cmp.leu p41,p39=n[2],a[2] } // (p23) +{ .mfi; (p23) st8 [tp_1]=n[2],8 + (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1] + (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23) +{ .mmb; (p21) getf.sig n[0]=nlo[3] + (p16) nop.m 0 + br.ctop.sptk .L1st_ctop };; +.L1st_cend: + +{ .mmi; getf.sig a[0]=ahi[6] // (p24) + getf.sig n[0]=nhi[4] + add num=-1,num };; // num-- +{ .mmi; .pred.rel "mutex",p40,p42 +(p40) add n[0]=n[0],a[0] +(p42) add n[0]=n[0],a[0],1 + sub aptr=aptr,len };; // rewind +{ .mmi; .pred.rel "mutex",p40,p42 +(p40) cmp.ltu p41,p39=n[0],a[0] +(p42) cmp.leu p41,p39=n[0],a[0] + sub nptr=nptr,len };; +{ .mmi; .pred.rel "mutex",p39,p41 +(p39) add topbit=r0,r0 +(p41) add topbit=r0,r0,1 + nop.i 0 } +{ .mmi; st8 [tp_1]=n[0] + add tptr=16,sp + add tp_1=8,sp };; + +.Louter: +{ .mmi; ldf8 bi=[bptr],8 // (*bp++) + ldf8 ahi[3]=[tptr] // tp[0] + add r30=8,aptr };; +{ .mmi; ldf8 alo[4]=[aptr],16 // ap[0] + ldf8 alo[3]=[r30],16 // ap[1] + add r31=8,nptr };; +{ .mfb; ldf8 alo[2]=[aptr],16 // ap[2] + xma.hu ahi[2]=alo[4],bi,ahi[3] // ap[0]*bp[i]+tp[0] + brp.loop.imp .Linner_ctop,.Linner_cend-16 + } +{ .mfb; ldf8 alo[1]=[r30] // ap[3] + xma.lu alo[4]=alo[4],bi,ahi[3] + clrrrb.pr };; +{ .mfi; ldf8 nlo[2]=[nptr],16 // np[0] + xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[i] + nop.i 0 } +{ .mfi; ldf8 nlo[1]=[r31] // np[1] + xma.lu alo[3]=alo[3],bi,ahi[2] + mov pr.rot=0x20101f<<16 + // ------^----- (p40) at first (p23) + // --------^--- (p30) at first (p22) + // ----------^^ p[16:20]=1 + };; +{ .mfi; st8 [tptr]=r0 // tp[0] is already accounted + xmpy.lu m0=alo[4],n0 // (ap[0]*bp[i]+tp[0])*n0 + mov ar.lc=lc } +{ .mfi; + fcvt.fxu.s1 nhi[1]=f0 + mov ar.ec=8 };; + +// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in +// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7 +// in latter case accounts for two-tick pipeline stall, which means +// that its performance would be ~20% lower than optimal one. No +// attempt was made to address this, because original Itanium is +// hardly represented out in the wild... +.align 32 +.Linner_ctop: +.pred.rel "mutex",p40,p42 +.pred.rel "mutex",p30,p32 +{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++) + (p18) xma.hu ahi[0]=alo[2],bi,ahi[1] + (p40) add n[2]=n[2],a[2] } // (p23) +{ .mfi; (p16) nop.m 0 + (p18) xma.lu alo[2]=alo[2],bi,ahi[1] + (p42) add n[2]=n[2],a[2],1 };; // (p23) +{ .mfi; (p21) getf.sig a[0]=alo[5] + (p16) nop.f 0 + (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23) +{ .mfi; (p21) ld8 t[0]=[tptr],8 + (p16) nop.f 0 + (p42) cmp.leu p41,p39=n[2],a[2] };; // (p23) +{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++) + (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1] + (p30) add a[1]=a[1],t[1] } // (p22) +{ .mfi; (p16) nop.m 0 + (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1] + (p32) add a[1]=a[1],t[1],1 };; // (p22) +{ .mmi; (p21) getf.sig n[0]=nlo[3] + (p16) nop.m 0 + (p30) cmp.ltu p31,p29=a[1],t[1] } // (p22) +{ .mmb; (p23) st8 [tp_1]=n[2],8 + (p32) cmp.leu p31,p29=a[1],t[1] // (p22) + br.ctop.sptk .Linner_ctop };; +.Linner_cend: + +{ .mmi; getf.sig a[0]=ahi[6] // (p24) + getf.sig n[0]=nhi[4] + nop.i 0 };; + +{ .mmi; .pred.rel "mutex",p31,p33 +(p31) add a[0]=a[0],topbit +(p33) add a[0]=a[0],topbit,1 + mov topbit=r0 };; +{ .mfi; .pred.rel "mutex",p31,p33 +(p31) cmp.ltu p32,p30=a[0],topbit +(p33) cmp.leu p32,p30=a[0],topbit + } +{ .mfi; .pred.rel "mutex",p40,p42 +(p40) add n[0]=n[0],a[0] +(p42) add n[0]=n[0],a[0],1 + };; +{ .mmi; .pred.rel "mutex",p44,p46 +(p40) cmp.ltu p41,p39=n[0],a[0] +(p42) cmp.leu p41,p39=n[0],a[0] +(p32) add topbit=r0,r0,1 } + +{ .mmi; st8 [tp_1]=n[0],8 + cmp4.ne p6,p0=1,num + sub aptr=aptr,len };; // rewind +{ .mmi; sub nptr=nptr,len +(p41) add topbit=r0,r0,1 + add tptr=16,sp } +{ .mmb; add tp_1=8,sp + add num=-1,num // num-- +(p6) br.cond.sptk.many .Louter };; + +{ .mbb; add lc=4,lc + brp.loop.imp .Lsub_ctop,.Lsub_cend-16 + clrrrb.pr };; +{ .mii; nop.m 0 + mov pr.rot=0x10001<<16 + // ------^---- (p33) at first (p17) + mov ar.lc=lc } +{ .mii; nop.m 0 + mov ar.ec=3 + nop.i 0 };; + +.Lsub_ctop: +.pred.rel "mutex",p33,p35 +{ .mfi; (p16) ld8 t[0]=[tptr],8 // t=*(tp++) + (p16) nop.f 0 + (p33) sub n[1]=t[1],n[1] } // (p17) +{ .mfi; (p16) ld8 n[0]=[nptr],8 // n=*(np++) + (p16) nop.f 0 + (p35) sub n[1]=t[1],n[1],1 };; // (p17) +{ .mib; (p18) st8 [rptr]=n[2],8 // *(rp++)=r + (p33) cmp.gtu p34,p32=n[1],t[1] // (p17) + (p18) nop.b 0 } +{ .mib; (p18) nop.m 0 + (p35) cmp.geu p34,p32=n[1],t[1] // (p17) + br.ctop.sptk .Lsub_ctop };; +.Lsub_cend: + +{ .mmb; .pred.rel "mutex",p34,p36 +(p34) sub topbit=topbit,r0 // (p19) +(p36) sub topbit=topbit,r0,1 + brp.loop.imp .Lcopy_ctop,.Lcopy_cend-16 + } +{ .mmb; sub rptr=rptr,len // rewind + sub tptr=tptr,len + clrrrb.pr };; +{ .mmi; and aptr=tptr,topbit + andcm bptr=rptr,topbit + mov pr.rot=1<<16 };; +{ .mii; or nptr=aptr,bptr + mov ar.lc=lc + mov ar.ec=3 };; + +.Lcopy_ctop: +{ .mmb; (p16) ld8 n[0]=[nptr],8 + (p18) st8 [tptr]=r0,8 + (p16) nop.b 0 } +{ .mmb; (p16) nop.m 0 + (p18) st8 [rptr]=n[2],8 + br.ctop.sptk .Lcopy_ctop };; +.Lcopy_cend: + +{ .mmi; mov ret0=1 // signal "handled" + rum 1<<5 // clear um.mfh + mov ar.lc=prevlc } +{ .mib; .restore sp + mov sp=prevsp + mov pr=prevpr,0x1ffff + br.ret.sptk.many b0 };; +.endp bn_mul_mont_general# + +a1=r16; a2=r17; a3=r18; a4=r19; a5=r20; a6=r21; a7=r22; a8=r23; +n1=r24; n2=r25; n3=r26; n4=r27; n5=r28; n6=r29; n7=r30; n8=r31; +t0=r15; + +ai0=f8; ai1=f9; ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15; +ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23; + +.align 64 +.skip 48 // aligns loop body +.local bn_mul_mont_8# +.proc bn_mul_mont_8# +bn_mul_mont_8: + .prologue +{ .mmi; .save ar.pfs,prevfs + alloc prevfs=ar.pfs,6,2,0,8 + .vframe prevsp + mov prevsp=sp + .save ar.lc,prevlc + mov prevlc=ar.lc } +{ .mmi; add r17=-6*16,sp + add sp=-7*16,sp + .save pr,prevpr + mov prevpr=pr };; + +{ .mmi; .save.gf 0,0x10 + stf.spill [sp]=f16,-16 + .save.gf 0,0x20 + stf.spill [r17]=f17,32 + add r16=-5*16,prevsp};; +{ .mmi; .save.gf 0,0x40 + stf.spill [r16]=f18,32 + .save.gf 0,0x80 + stf.spill [r17]=f19,32 + $ADDP aptr=0,in1 };; +{ .mmi; .save.gf 0,0x100 + stf.spill [r16]=f20,32 + .save.gf 0,0x200 + stf.spill [r17]=f21,32 + $ADDP r29=8,in1 };; +{ .mmi; .save.gf 0,0x400 + stf.spill [r16]=f22 + .save.gf 0,0x800 + stf.spill [r17]=f23 + $ADDP rptr=0,in0 };; + + .body + .rotf bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10] + .rotr t[8] + +// load input vectors padding them to 8 elements +{ .mmi; ldf8 ai0=[aptr],16 // ap[0] + ldf8 ai1=[r29],16 // ap[1] + $ADDP bptr=0,in2 } +{ .mmi; $ADDP r30=8,in2 + $ADDP nptr=0,in3 + $ADDP r31=8,in3 };; +{ .mmi; ldf8 bj[7]=[bptr],16 // bp[0] + ldf8 bj[6]=[r30],16 // bp[1] + cmp4.le p4,p5=3,in5 } +{ .mmi; ldf8 ni0=[nptr],16 // np[0] + ldf8 ni1=[r31],16 // np[1] + cmp4.le p6,p7=4,in5 };; + +{ .mfi; (p4)ldf8 ai2=[aptr],16 // ap[2] + (p5)fcvt.fxu ai2=f0 + cmp4.le p8,p9=5,in5 } +{ .mfi; (p6)ldf8 ai3=[r29],16 // ap[3] + (p7)fcvt.fxu ai3=f0 + cmp4.le p10,p11=6,in5 } +{ .mfi; (p4)ldf8 bj[5]=[bptr],16 // bp[2] + (p5)fcvt.fxu bj[5]=f0 + cmp4.le p12,p13=7,in5 } +{ .mfi; (p6)ldf8 bj[4]=[r30],16 // bp[3] + (p7)fcvt.fxu bj[4]=f0 + cmp4.le p14,p15=8,in5 } +{ .mfi; (p4)ldf8 ni2=[nptr],16 // np[2] + (p5)fcvt.fxu ni2=f0 + addp4 r28=-1,in5 } +{ .mfi; (p6)ldf8 ni3=[r31],16 // np[3] + (p7)fcvt.fxu ni3=f0 + $ADDP in4=0,in4 };; + +{ .mfi; ldf8 n0=[in4] + fcvt.fxu tf[1]=f0 + nop.i 0 } + +{ .mfi; (p8)ldf8 ai4=[aptr],16 // ap[4] + (p9)fcvt.fxu ai4=f0 + mov t[0]=r0 } +{ .mfi; (p10)ldf8 ai5=[r29],16 // ap[5] + (p11)fcvt.fxu ai5=f0 + mov t[1]=r0 } +{ .mfi; (p8)ldf8 bj[3]=[bptr],16 // bp[4] + (p9)fcvt.fxu bj[3]=f0 + mov t[2]=r0 } +{ .mfi; (p10)ldf8 bj[2]=[r30],16 // bp[5] + (p11)fcvt.fxu bj[2]=f0 + mov t[3]=r0 } +{ .mfi; (p8)ldf8 ni4=[nptr],16 // np[4] + (p9)fcvt.fxu ni4=f0 + mov t[4]=r0 } +{ .mfi; (p10)ldf8 ni5=[r31],16 // np[5] + (p11)fcvt.fxu ni5=f0 + mov t[5]=r0 };; + +{ .mfi; (p12)ldf8 ai6=[aptr],16 // ap[6] + (p13)fcvt.fxu ai6=f0 + mov t[6]=r0 } +{ .mfi; (p14)ldf8 ai7=[r29],16 // ap[7] + (p15)fcvt.fxu ai7=f0 + mov t[7]=r0 } +{ .mfi; (p12)ldf8 bj[1]=[bptr],16 // bp[6] + (p13)fcvt.fxu bj[1]=f0 + mov ar.lc=r28 } +{ .mfi; (p14)ldf8 bj[0]=[r30],16 // bp[7] + (p15)fcvt.fxu bj[0]=f0 + mov ar.ec=1 } +{ .mfi; (p12)ldf8 ni6=[nptr],16 // np[6] + (p13)fcvt.fxu ni6=f0 + mov pr.rot=1<<16 } +{ .mfb; (p14)ldf8 ni7=[r31],16 // np[7] + (p15)fcvt.fxu ni7=f0 + brp.loop.imp .Louter_8_ctop,.Louter_8_cend-16 + };; + +// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt +// to measure with help of Interval Time Counter indicated that the +// factor is a tad higher: 33 or 34, if not 35. Exact measurement and +// addressing the issue is problematic, because I don't have access +// to platform-specific instruction-level profiler. On Itanium it +// should run in 56*n ticks, because of higher xma latency... +.Louter_8_ctop: + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mfi; (p16) nop.m 0 // 0: + (p16) xma.hu ahi[0]=ai0,bj[7],tf[1] // ap[0]*b[i]+t[0] + (p40) add a3=a3,n3 } // (p17) a3+=n3 +{ .mfi; (p42) add a3=a3,n3,1 + (p16) xma.lu alo[0]=ai0,bj[7],tf[1] + (p16) nop.i 0 };; +{ .mii; (p17) getf.sig a7=alo[8] // 1: + (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3 + (p50) add t[6]=t[6],a3,1 };; +{ .mfi; (p17) getf.sig a8=ahi[8] // 2: + (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0 + (p40) cmp.ltu p43,p41=a3,n3 } +{ .mfi; (p42) cmp.leu p43,p41=a3,n3 + (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6] + (p16) nop.i 0 };; +{ .mii; (p17) getf.sig n5=nlo[6] // 3: + (p48) cmp.ltu p51,p49=t[6],a3 + (p50) cmp.leu p51,p49=t[6],a3 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mfi; (p16) nop.m 0 // 4: + (p16) xma.hu ahi[1]=ai1,bj[7],ahi[0] // ap[1]*b[i] + (p41) add a4=a4,n4 } // (p17) a4+=n4 +{ .mfi; (p43) add a4=a4,n4,1 + (p16) xma.lu alo[1]=ai1,bj[7],ahi[0] + (p16) nop.i 0 };; +{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4 + (p16) xmpy.lu mj[0]=alo[0],n0 // (ap[0]*b[i]+t[0])*n0 + (p51) add t[5]=t[5],a4,1 };; +{ .mfi; (p16) nop.m 0 // 6: + (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0 + (p41) cmp.ltu p42,p40=a4,n4 } +{ .mfi; (p43) cmp.leu p42,p40=a4,n4 + (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7] + (p16) nop.i 0 };; +{ .mii; (p17) getf.sig n6=nlo[7] // 7: + (p49) cmp.ltu p50,p48=t[5],a4 + (p51) cmp.leu p50,p48=t[5],a4 };; + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mfi; (p16) nop.m 0 // 8: + (p16) xma.hu ahi[2]=ai2,bj[7],ahi[1] // ap[2]*b[i] + (p40) add a5=a5,n5 } // (p17) a5+=n5 +{ .mfi; (p42) add a5=a5,n5,1 + (p16) xma.lu alo[2]=ai2,bj[7],ahi[1] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig a1=alo[1] // 9: + (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5 + (p50) add t[4]=t[4],a5,1 };; +{ .mfi; (p16) nop.m 0 // 10: + (p16) xma.hu nhi[0]=ni0,mj[0],alo[0] // np[0]*m0 + (p40) cmp.ltu p43,p41=a5,n5 } +{ .mfi; (p42) cmp.leu p43,p41=a5,n5 + (p16) xma.lu nlo[0]=ni0,mj[0],alo[0] + (p16) nop.i 0 };; +{ .mii; (p17) getf.sig n7=nlo[8] // 11: + (p48) cmp.ltu p51,p49=t[4],a5 + (p50) cmp.leu p51,p49=t[4],a5 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mfi; (p17) getf.sig n8=nhi[8] // 12: + (p16) xma.hu ahi[3]=ai3,bj[7],ahi[2] // ap[3]*b[i] + (p41) add a6=a6,n6 } // (p17) a6+=n6 +{ .mfi; (p43) add a6=a6,n6,1 + (p16) xma.lu alo[3]=ai3,bj[7],ahi[2] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig a2=alo[2] // 13: + (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6 + (p51) add t[3]=t[3],a6,1 };; +{ .mfi; (p16) nop.m 0 // 14: + (p16) xma.hu nhi[1]=ni1,mj[0],nhi[0] // np[1]*m0 + (p41) cmp.ltu p42,p40=a6,n6 } +{ .mfi; (p43) cmp.leu p42,p40=a6,n6 + (p16) xma.lu nlo[1]=ni1,mj[0],nhi[0] + (p16) nop.i 0 };; +{ .mii; (p16) nop.m 0 // 15: + (p49) cmp.ltu p50,p48=t[3],a6 + (p51) cmp.leu p50,p48=t[3],a6 };; + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mfi; (p16) nop.m 0 // 16: + (p16) xma.hu ahi[4]=ai4,bj[7],ahi[3] // ap[4]*b[i] + (p40) add a7=a7,n7 } // (p17) a7+=n7 +{ .mfi; (p42) add a7=a7,n7,1 + (p16) xma.lu alo[4]=ai4,bj[7],ahi[3] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig a3=alo[3] // 17: + (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7 + (p50) add t[2]=t[2],a7,1 };; +{ .mfi; (p16) nop.m 0 // 18: + (p16) xma.hu nhi[2]=ni2,mj[0],nhi[1] // np[2]*m0 + (p40) cmp.ltu p43,p41=a7,n7 } +{ .mfi; (p42) cmp.leu p43,p41=a7,n7 + (p16) xma.lu nlo[2]=ni2,mj[0],nhi[1] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig n1=nlo[1] // 19: + (p48) cmp.ltu p51,p49=t[2],a7 + (p50) cmp.leu p51,p49=t[2],a7 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mfi; (p16) nop.m 0 // 20: + (p16) xma.hu ahi[5]=ai5,bj[7],ahi[4] // ap[5]*b[i] + (p41) add a8=a8,n8 } // (p17) a8+=n8 +{ .mfi; (p43) add a8=a8,n8,1 + (p16) xma.lu alo[5]=ai5,bj[7],ahi[4] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig a4=alo[4] // 21: + (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8 + (p51) add t[1]=t[1],a8,1 };; +{ .mfi; (p16) nop.m 0 // 22: + (p16) xma.hu nhi[3]=ni3,mj[0],nhi[2] // np[3]*m0 + (p41) cmp.ltu p42,p40=a8,n8 } +{ .mfi; (p43) cmp.leu p42,p40=a8,n8 + (p16) xma.lu nlo[3]=ni3,mj[0],nhi[2] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig n2=nlo[2] // 23: + (p49) cmp.ltu p50,p48=t[1],a8 + (p51) cmp.leu p50,p48=t[1],a8 };; +{ .mfi; (p16) nop.m 0 // 24: + (p16) xma.hu ahi[6]=ai6,bj[7],ahi[5] // ap[6]*b[i] + (p16) add a1=a1,n1 } // (p16) a1+=n1 +{ .mfi; (p16) nop.m 0 + (p16) xma.lu alo[6]=ai6,bj[7],ahi[5] + (p17) mov t[0]=r0 };; +{ .mii; (p16) getf.sig a5=alo[5] // 25: + (p16) add t0=t[7],a1 // (p16) t[7]+=a1 + (p42) add t[0]=t[0],r0,1 };; +{ .mfi; (p16) setf.sig tf[0]=t0 // 26: + (p16) xma.hu nhi[4]=ni4,mj[0],nhi[3] // np[4]*m0 + (p50) add t[0]=t[0],r0,1 } +{ .mfi; (p16) cmp.ltu.unc p42,p40=a1,n1 + (p16) xma.lu nlo[4]=ni4,mj[0],nhi[3] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig n3=nlo[3] // 27: + (p16) cmp.ltu.unc p50,p48=t0,a1 + (p16) nop.i 0 };; + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mfi; (p16) nop.m 0 // 28: + (p16) xma.hu ahi[7]=ai7,bj[7],ahi[6] // ap[7]*b[i] + (p40) add a2=a2,n2 } // (p16) a2+=n2 +{ .mfi; (p42) add a2=a2,n2,1 + (p16) xma.lu alo[7]=ai7,bj[7],ahi[6] + (p16) nop.i 0 };; +{ .mii; (p16) getf.sig a6=alo[6] // 29: + (p48) add t[6]=t[6],a2 // (p16) t[6]+=a2 + (p50) add t[6]=t[6],a2,1 };; +{ .mfi; (p16) nop.m 0 // 30: + (p16) xma.hu nhi[5]=ni5,mj[0],nhi[4] // np[5]*m0 + (p40) cmp.ltu p41,p39=a2,n2 } +{ .mfi; (p42) cmp.leu p41,p39=a2,n2 + (p16) xma.lu nlo[5]=ni5,mj[0],nhi[4] + (p16) nop.i 0 };; +{ .mfi; (p16) getf.sig n4=nlo[4] // 31: + (p16) nop.f 0 + (p48) cmp.ltu p49,p47=t[6],a2 } +{ .mfb; (p50) cmp.leu p49,p47=t[6],a2 + (p16) nop.f 0 + br.ctop.sptk.many .Louter_8_ctop };; +.Louter_8_cend: + +// above loop has to execute one more time, without (p16), which is +// replaced with merged move of np[8] to GPR bank + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mmi; (p0) getf.sig n1=ni0 // 0: + (p40) add a3=a3,n3 // (p17) a3+=n3 + (p42) add a3=a3,n3,1 };; +{ .mii; (p17) getf.sig a7=alo[8] // 1: + (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3 + (p50) add t[6]=t[6],a3,1 };; +{ .mfi; (p17) getf.sig a8=ahi[8] // 2: + (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0 + (p40) cmp.ltu p43,p41=a3,n3 } +{ .mfi; (p42) cmp.leu p43,p41=a3,n3 + (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6] + (p0) nop.i 0 };; +{ .mii; (p17) getf.sig n5=nlo[6] // 3: + (p48) cmp.ltu p51,p49=t[6],a3 + (p50) cmp.leu p51,p49=t[6],a3 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mmi; (p0) getf.sig n2=ni1 // 4: + (p41) add a4=a4,n4 // (p17) a4+=n4 + (p43) add a4=a4,n4,1 };; +{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4 + (p0) nop.f 0 + (p51) add t[5]=t[5],a4,1 };; +{ .mfi; (p0) getf.sig n3=ni2 // 6: + (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0 + (p41) cmp.ltu p42,p40=a4,n4 } +{ .mfi; (p43) cmp.leu p42,p40=a4,n4 + (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7] + (p0) nop.i 0 };; +{ .mii; (p17) getf.sig n6=nlo[7] // 7: + (p49) cmp.ltu p50,p48=t[5],a4 + (p51) cmp.leu p50,p48=t[5],a4 };; + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mii; (p0) getf.sig n4=ni3 // 8: + (p40) add a5=a5,n5 // (p17) a5+=n5 + (p42) add a5=a5,n5,1 };; +{ .mii; (p0) nop.m 0 // 9: + (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5 + (p50) add t[4]=t[4],a5,1 };; +{ .mii; (p0) nop.m 0 // 10: + (p40) cmp.ltu p43,p41=a5,n5 + (p42) cmp.leu p43,p41=a5,n5 };; +{ .mii; (p17) getf.sig n7=nlo[8] // 11: + (p48) cmp.ltu p51,p49=t[4],a5 + (p50) cmp.leu p51,p49=t[4],a5 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mii; (p17) getf.sig n8=nhi[8] // 12: + (p41) add a6=a6,n6 // (p17) a6+=n6 + (p43) add a6=a6,n6,1 };; +{ .mii; (p0) getf.sig n5=ni4 // 13: + (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6 + (p51) add t[3]=t[3],a6,1 };; +{ .mii; (p0) nop.m 0 // 14: + (p41) cmp.ltu p42,p40=a6,n6 + (p43) cmp.leu p42,p40=a6,n6 };; +{ .mii; (p0) getf.sig n6=ni5 // 15: + (p49) cmp.ltu p50,p48=t[3],a6 + (p51) cmp.leu p50,p48=t[3],a6 };; + .pred.rel "mutex",p40,p42 + .pred.rel "mutex",p48,p50 +{ .mii; (p0) nop.m 0 // 16: + (p40) add a7=a7,n7 // (p17) a7+=n7 + (p42) add a7=a7,n7,1 };; +{ .mii; (p0) nop.m 0 // 17: + (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7 + (p50) add t[2]=t[2],a7,1 };; +{ .mii; (p0) nop.m 0 // 18: + (p40) cmp.ltu p43,p41=a7,n7 + (p42) cmp.leu p43,p41=a7,n7 };; +{ .mii; (p0) getf.sig n7=ni6 // 19: + (p48) cmp.ltu p51,p49=t[2],a7 + (p50) cmp.leu p51,p49=t[2],a7 };; + .pred.rel "mutex",p41,p43 + .pred.rel "mutex",p49,p51 +{ .mii; (p0) nop.m 0 // 20: + (p41) add a8=a8,n8 // (p17) a8+=n8 + (p43) add a8=a8,n8,1 };; +{ .mmi; (p0) nop.m 0 // 21: + (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8 + (p51) add t[1]=t[1],a8,1 } +{ .mmi; (p17) mov t[0]=r0 + (p41) cmp.ltu p42,p40=a8,n8 + (p43) cmp.leu p42,p40=a8,n8 };; +{ .mmi; (p0) getf.sig n8=ni7 // 22: + (p49) cmp.ltu p50,p48=t[1],a8 + (p51) cmp.leu p50,p48=t[1],a8 } +{ .mmi; (p42) add t[0]=t[0],r0,1 + (p0) add r16=-7*16,prevsp + (p0) add r17=-6*16,prevsp };; + +// subtract np[8] from carrybit|tmp[8] +// carrybit|tmp[8] layout upon exit from above loop is: +// t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant) +{ .mmi; (p50)add t[0]=t[0],r0,1 + add r18=-5*16,prevsp + sub n1=t0,n1 };; +{ .mmi; cmp.gtu p34,p32=n1,t0;; + .pred.rel "mutex",p32,p34 + (p32)sub n2=t[7],n2 + (p34)sub n2=t[7],n2,1 };; +{ .mii; (p32)cmp.gtu p35,p33=n2,t[7] + (p34)cmp.geu p35,p33=n2,t[7];; + .pred.rel "mutex",p33,p35 + (p33)sub n3=t[6],n3 } +{ .mmi; (p35)sub n3=t[6],n3,1;; + (p33)cmp.gtu p34,p32=n3,t[6] + (p35)cmp.geu p34,p32=n3,t[6] };; + .pred.rel "mutex",p32,p34 +{ .mii; (p32)sub n4=t[5],n4 + (p34)sub n4=t[5],n4,1;; + (p32)cmp.gtu p35,p33=n4,t[5] } +{ .mmi; (p34)cmp.geu p35,p33=n4,t[5];; + .pred.rel "mutex",p33,p35 + (p33)sub n5=t[4],n5 + (p35)sub n5=t[4],n5,1 };; +{ .mii; (p33)cmp.gtu p34,p32=n5,t[4] + (p35)cmp.geu p34,p32=n5,t[4];; + .pred.rel "mutex",p32,p34 + (p32)sub n6=t[3],n6 } +{ .mmi; (p34)sub n6=t[3],n6,1;; + (p32)cmp.gtu p35,p33=n6,t[3] + (p34)cmp.geu p35,p33=n6,t[3] };; + .pred.rel "mutex",p33,p35 +{ .mii; (p33)sub n7=t[2],n7 + (p35)sub n7=t[2],n7,1;; + (p33)cmp.gtu p34,p32=n7,t[2] } +{ .mmi; (p35)cmp.geu p34,p32=n7,t[2];; + .pred.rel "mutex",p32,p34 + (p32)sub n8=t[1],n8 + (p34)sub n8=t[1],n8,1 };; +{ .mii; (p32)cmp.gtu p35,p33=n8,t[1] + (p34)cmp.geu p35,p33=n8,t[1];; + .pred.rel "mutex",p33,p35 + (p33)sub a8=t[0],r0 } +{ .mmi; (p35)sub a8=t[0],r0,1;; + (p33)cmp.gtu p34,p32=a8,t[0] + (p35)cmp.geu p34,p32=a8,t[0] };; + +// save the result, either tmp[num] or tmp[num]-np[num] + .pred.rel "mutex",p32,p34 +{ .mmi; (p32)st8 [rptr]=n1,8 + (p34)st8 [rptr]=t0,8 + add r19=-4*16,prevsp};; +{ .mmb; (p32)st8 [rptr]=n2,8 + (p34)st8 [rptr]=t[7],8 + (p5)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n3,8 + (p34)st8 [rptr]=t[6],8 + (p7)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n4,8 + (p34)st8 [rptr]=t[5],8 + (p9)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n5,8 + (p34)st8 [rptr]=t[4],8 + (p11)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n6,8 + (p34)st8 [rptr]=t[3],8 + (p13)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n7,8 + (p34)st8 [rptr]=t[2],8 + (p15)br.cond.dpnt.few .Ldone };; +{ .mmb; (p32)st8 [rptr]=n8,8 + (p34)st8 [rptr]=t[1],8 + nop.b 0 };; +.Ldone: // epilogue +{ .mmi; ldf.fill f16=[r16],64 + ldf.fill f17=[r17],64 + nop.i 0 } +{ .mmi; ldf.fill f18=[r18],64 + ldf.fill f19=[r19],64 + mov pr=prevpr,0x1ffff };; +{ .mmi; ldf.fill f20=[r16] + ldf.fill f21=[r17] + mov ar.lc=prevlc } +{ .mmi; ldf.fill f22=[r18] + ldf.fill f23=[r19] + mov ret0=1 } // signal "handled" +{ .mib; rum 1<<5 + .restore sp + mov sp=prevsp + br.ret.sptk.many b0 };; +.endp bn_mul_mont_8# + +.type copyright#,\@object +copyright: +stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by " +___ + +$output=shift and open STDOUT,">$output"; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/ia64.S b/app/openssl/crypto/bn/asm/ia64.S index 951abc53..c0cee821 100644 --- a/app/openssl/crypto/bn/asm/ia64.S +++ b/app/openssl/crypto/bn/asm/ia64.S @@ -568,7 +568,7 @@ bn_sqr_comba8: // I've estimated this routine to run in ~120 ticks, but in reality // (i.e. according to ar.itc) it takes ~160 ticks. Are those extra // cycles consumed for instructions fetch? Or did I misinterpret some -// clause in Itanium µ-architecture manual? Comments are welcomed and +// clause in Itanium µ-architecture manual? Comments are welcomed and // highly appreciated. // // On Itanium 2 it takes ~190 ticks. This is because of stalls on diff --git a/app/openssl/crypto/bn/asm/mips-mont.S b/app/openssl/crypto/bn/asm/mips-mont.S new file mode 100644 index 00000000..1b875a2a --- /dev/null +++ b/app/openssl/crypto/bn/asm/mips-mont.S @@ -0,0 +1,284 @@ +.text + +.set noat +.set noreorder + +.align 5 +.globl bn_mul_mont +.ent bn_mul_mont +bn_mul_mont: + lw $8,16($29) + lw $9,20($29) + slt $1,$9,4 + bnez $1,1f + li $2,0 + slt $1,$9,17 # on in-order CPU + bnez $1,bn_mul_mont_internal + nop +1: jr $31 + li $4,0 +.end bn_mul_mont + +.align 5 +.ent bn_mul_mont_internal +bn_mul_mont_internal: + .frame $30,14*4,$31 + .mask 0x40000000|16711680,-4 + sub $29,14*4 + sw $30,(14-1)*4($29) + sw $23,(14-2)*4($29) + sw $22,(14-3)*4($29) + sw $21,(14-4)*4($29) + sw $20,(14-5)*4($29) + sw $19,(14-6)*4($29) + sw $18,(14-7)*4($29) + sw $17,(14-8)*4($29) + sw $16,(14-9)*4($29) + move $30,$29 + + .set reorder + lw $8,0($8) + lw $13,0($6) # bp[0] + lw $12,0($5) # ap[0] + lw $14,0($7) # np[0] + + sub $29,2*4 # place for two extra words + sll $9,2 + li $1,-4096 + sub $29,$9 + and $29,$1 + + multu $12,$13 + lw $16,4($5) + lw $18,4($7) + mflo $10 + mfhi $11 + multu $10,$8 + mflo $23 + + multu $16,$13 + mflo $16 + mfhi $17 + + multu $14,$23 + mflo $24 + mfhi $25 + multu $18,$23 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + mflo $18 + mfhi $19 + + move $15,$29 + li $22,2*4 +.align 4 +.L1st: + .set noreorder + add $12,$5,$22 + add $14,$7,$22 + lw $12,($12) + lw $14,($14) + + multu $12,$13 + addu $10,$16,$11 + addu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + addu $11,$17,$1 + addu $25,$19,$2 + mflo $16 + mfhi $17 + + addu $24,$10 + sltu $1,$24,$10 + multu $14,$23 + addu $25,$1 + addu $22,4 + sw $24,($15) + sltu $2,$22,$9 + mflo $18 + mfhi $19 + + bnez $2,.L1st + add $15,4 + .set reorder + + addu $10,$16,$11 + sltu $1,$10,$11 + addu $11,$17,$1 + + addu $24,$18,$25 + sltu $2,$24,$25 + addu $25,$19,$2 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + + sw $24,($15) + + addu $25,$11 + sltu $1,$25,$11 + sw $25,4($15) + sw $1,2*4($15) + + li $21,4 +.align 4 +.Louter: + add $13,$6,$21 + lw $13,($13) + lw $12,($5) + lw $16,4($5) + lw $20,($29) + + multu $12,$13 + lw $14,($7) + lw $18,4($7) + mflo $10 + mfhi $11 + addu $10,$20 + multu $10,$8 + sltu $1,$10,$20 + addu $11,$1 + mflo $23 + + multu $16,$13 + mflo $16 + mfhi $17 + + multu $14,$23 + mflo $24 + mfhi $25 + + multu $18,$23 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + mflo $18 + mfhi $19 + + move $15,$29 + li $22,2*4 + lw $20,4($15) +.align 4 +.Linner: + .set noreorder + add $12,$5,$22 + add $14,$7,$22 + lw $12,($12) + lw $14,($14) + + multu $12,$13 + addu $10,$16,$11 + addu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + addu $11,$17,$1 + addu $25,$19,$2 + mflo $16 + mfhi $17 + + addu $10,$20 + addu $22,4 + multu $14,$23 + sltu $1,$10,$20 + addu $24,$10 + addu $11,$1 + sltu $2,$24,$10 + lw $20,2*4($15) + addu $25,$2 + sltu $1,$22,$9 + mflo $18 + mfhi $19 + sw $24,($15) + bnez $1,.Linner + add $15,4 + .set reorder + + addu $10,$16,$11 + sltu $1,$10,$11 + addu $11,$17,$1 + addu $10,$20 + sltu $2,$10,$20 + addu $11,$2 + + lw $20,2*4($15) + addu $24,$18,$25 + sltu $1,$24,$25 + addu $25,$19,$1 + addu $24,$10 + sltu $2,$24,$10 + addu $25,$2 + sw $24,($15) + + addu $24,$25,$11 + sltu $25,$24,$11 + addu $24,$20 + sltu $1,$24,$20 + addu $25,$1 + sw $24,4($15) + sw $25,2*4($15) + + addu $21,4 + sltu $2,$21,$9 + bnez $2,.Louter + + .set noreorder + add $20,$29,$9 # &tp[num] + move $15,$29 + move $5,$29 + li $11,0 # clear borrow bit + +.align 4 +.Lsub: lw $10,($15) + lw $24,($7) + add $15,4 + add $7,4 + subu $24,$10,$24 # tp[i]-np[i] + sgtu $1,$24,$10 + subu $10,$24,$11 + sgtu $11,$10,$24 + sw $10,($4) + or $11,$1 + sltu $1,$15,$20 + bnez $1,.Lsub + add $4,4 + + subu $11,$25,$11 # handle upmost overflow bit + move $15,$29 + sub $4,$9 # restore rp + not $25,$11 + + and $5,$11,$29 + and $6,$25,$4 + or $5,$5,$6 # ap=borrow?tp:rp + +.align 4 +.Lcopy: lw $12,($5) + add $5,4 + sw $0,($15) + add $15,4 + sltu $1,$15,$20 + sw $12,($4) + bnez $1,.Lcopy + add $4,4 + + li $4,1 + li $2,1 + + .set noreorder + move $29,$30 + lw $30,(14-1)*4($29) + lw $23,(14-2)*4($29) + lw $22,(14-3)*4($29) + lw $21,(14-4)*4($29) + lw $20,(14-5)*4($29) + lw $19,(14-6)*4($29) + lw $18,(14-7)*4($29) + lw $17,(14-8)*4($29) + lw $16,(14-9)*4($29) + jr $31 + add $29,14*4 +.end bn_mul_mont_internal +.rdata +.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by " diff --git a/app/openssl/crypto/bn/asm/mips-mont.pl b/app/openssl/crypto/bn/asm/mips-mont.pl new file mode 100644 index 00000000..caae04ed --- /dev/null +++ b/app/openssl/crypto/bn/asm/mips-mont.pl @@ -0,0 +1,426 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# This module doesn't present direct interest for OpenSSL, because it +# doesn't provide better performance for longer keys, at least not on +# in-order-execution cores. While 512-bit RSA sign operations can be +# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and +# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from +# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA +# verify:-( All comparisons are against bn_mul_mont-free assembler. +# The module might be of interest to embedded system developers, as +# the code is smaller than 1KB, yet offers >3x improvement on MIPS64 +# and 75-30% [less for longer keys] on MIPS32 over compiler-generated +# code. + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $SZREG=4; +} +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; +# +# +# +###################################################################### + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +if ($flavour =~ /64|n32/i) { + $LD="ld"; + $ST="sd"; + $MULTU="dmultu"; + $ADDU="daddu"; + $SUBU="dsubu"; + $BNSZ=8; +} else { + $LD="lw"; + $ST="sw"; + $MULTU="multu"; + $ADDU="addu"; + $SUBU="subu"; + $BNSZ=4; +} + +# int bn_mul_mont( +$rp=$a0; # BN_ULONG *rp, +$ap=$a1; # const BN_ULONG *ap, +$bp=$a2; # const BN_ULONG *bp, +$np=$a3; # const BN_ULONG *np, +$n0=$a4; # const BN_ULONG *n0, +$num=$a5; # int num); + +$lo0=$a6; +$hi0=$a7; +$lo1=$t1; +$hi1=$t2; +$aj=$s0; +$bi=$s1; +$nj=$s2; +$tp=$s3; +$alo=$s4; +$ahi=$s5; +$nlo=$s6; +$nhi=$s7; +$tj=$s8; +$i=$s9; +$j=$s10; +$m1=$s11; + +$FRAMESIZE=14; + +$code=<<___; +.text + +.set noat +.set noreorder + +.align 5 +.globl bn_mul_mont +.ent bn_mul_mont +bn_mul_mont: +___ +$code.=<<___ if ($flavour =~ /o32/i); + lw $n0,16($sp) + lw $num,20($sp) +___ +$code.=<<___; + slt $at,$num,4 + bnez $at,1f + li $t0,0 + slt $at,$num,17 # on in-order CPU + bnez $at,bn_mul_mont_internal + nop +1: jr $ra + li $a0,0 +.end bn_mul_mont + +.align 5 +.ent bn_mul_mont_internal +bn_mul_mont_internal: + .frame $fp,$FRAMESIZE*$SZREG,$ra + .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG + $PTR_SUB $sp,$FRAMESIZE*$SZREG + $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp) + $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp) + $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp) + $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp) + $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp) + $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp) + $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp) + $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp) + $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp) + $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp) + $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp) + $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp) +___ +$code.=<<___; + move $fp,$sp + + .set reorder + $LD $n0,0($n0) + $LD $bi,0($bp) # bp[0] + $LD $aj,0($ap) # ap[0] + $LD $nj,0($np) # np[0] + + $PTR_SUB $sp,2*$BNSZ # place for two extra words + sll $num,`log($BNSZ)/log(2)` + li $at,-4096 + $PTR_SUB $sp,$num + and $sp,$at + + $MULTU $aj,$bi + $LD $alo,$BNSZ($ap) + $LD $nlo,$BNSZ($np) + mflo $lo0 + mfhi $hi0 + $MULTU $lo0,$n0 + mflo $m1 + + $MULTU $alo,$bi + mflo $alo + mfhi $ahi + + $MULTU $nj,$m1 + mflo $lo1 + mfhi $hi1 + $MULTU $nlo,$m1 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + mflo $nlo + mfhi $nhi + + move $tp,$sp + li $j,2*$BNSZ +.align 4 +.L1st: + .set noreorder + $PTR_ADD $aj,$ap,$j + $PTR_ADD $nj,$np,$j + $LD $aj,($aj) + $LD $nj,($nj) + + $MULTU $aj,$bi + $ADDU $lo0,$alo,$hi0 + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo0,$hi0 + sltu $t0,$lo1,$hi1 + $ADDU $hi0,$ahi,$at + $ADDU $hi1,$nhi,$t0 + mflo $alo + mfhi $ahi + + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $MULTU $nj,$m1 + $ADDU $hi1,$at + addu $j,$BNSZ + $ST $lo1,($tp) + sltu $t0,$j,$num + mflo $nlo + mfhi $nhi + + bnez $t0,.L1st + $PTR_ADD $tp,$BNSZ + .set reorder + + $ADDU $lo0,$alo,$hi0 + sltu $at,$lo0,$hi0 + $ADDU $hi0,$ahi,$at + + $ADDU $lo1,$nlo,$hi1 + sltu $t0,$lo1,$hi1 + $ADDU $hi1,$nhi,$t0 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + + $ST $lo1,($tp) + + $ADDU $hi1,$hi0 + sltu $at,$hi1,$hi0 + $ST $hi1,$BNSZ($tp) + $ST $at,2*$BNSZ($tp) + + li $i,$BNSZ +.align 4 +.Louter: + $PTR_ADD $bi,$bp,$i + $LD $bi,($bi) + $LD $aj,($ap) + $LD $alo,$BNSZ($ap) + $LD $tj,($sp) + + $MULTU $aj,$bi + $LD $nj,($np) + $LD $nlo,$BNSZ($np) + mflo $lo0 + mfhi $hi0 + $ADDU $lo0,$tj + $MULTU $lo0,$n0 + sltu $at,$lo0,$tj + $ADDU $hi0,$at + mflo $m1 + + $MULTU $alo,$bi + mflo $alo + mfhi $ahi + + $MULTU $nj,$m1 + mflo $lo1 + mfhi $hi1 + + $MULTU $nlo,$m1 + $ADDU $lo1,$lo0 + sltu $at,$lo1,$lo0 + $ADDU $hi1,$at + mflo $nlo + mfhi $nhi + + move $tp,$sp + li $j,2*$BNSZ + $LD $tj,$BNSZ($tp) +.align 4 +.Linner: + .set noreorder + $PTR_ADD $aj,$ap,$j + $PTR_ADD $nj,$np,$j + $LD $aj,($aj) + $LD $nj,($nj) + + $MULTU $aj,$bi + $ADDU $lo0,$alo,$hi0 + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo0,$hi0 + sltu $t0,$lo1,$hi1 + $ADDU $hi0,$ahi,$at + $ADDU $hi1,$nhi,$t0 + mflo $alo + mfhi $ahi + + $ADDU $lo0,$tj + addu $j,$BNSZ + $MULTU $nj,$m1 + sltu $at,$lo0,$tj + $ADDU $lo1,$lo0 + $ADDU $hi0,$at + sltu $t0,$lo1,$lo0 + $LD $tj,2*$BNSZ($tp) + $ADDU $hi1,$t0 + sltu $at,$j,$num + mflo $nlo + mfhi $nhi + $ST $lo1,($tp) + bnez $at,.Linner + $PTR_ADD $tp,$BNSZ + .set reorder + + $ADDU $lo0,$alo,$hi0 + sltu $at,$lo0,$hi0 + $ADDU $hi0,$ahi,$at + $ADDU $lo0,$tj + sltu $t0,$lo0,$tj + $ADDU $hi0,$t0 + + $LD $tj,2*$BNSZ($tp) + $ADDU $lo1,$nlo,$hi1 + sltu $at,$lo1,$hi1 + $ADDU $hi1,$nhi,$at + $ADDU $lo1,$lo0 + sltu $t0,$lo1,$lo0 + $ADDU $hi1,$t0 + $ST $lo1,($tp) + + $ADDU $lo1,$hi1,$hi0 + sltu $hi1,$lo1,$hi0 + $ADDU $lo1,$tj + sltu $at,$lo1,$tj + $ADDU $hi1,$at + $ST $lo1,$BNSZ($tp) + $ST $hi1,2*$BNSZ($tp) + + addu $i,$BNSZ + sltu $t0,$i,$num + bnez $t0,.Louter + + .set noreorder + $PTR_ADD $tj,$sp,$num # &tp[num] + move $tp,$sp + move $ap,$sp + li $hi0,0 # clear borrow bit + +.align 4 +.Lsub: $LD $lo0,($tp) + $LD $lo1,($np) + $PTR_ADD $tp,$BNSZ + $PTR_ADD $np,$BNSZ + $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i] + sgtu $at,$lo1,$lo0 + $SUBU $lo0,$lo1,$hi0 + sgtu $hi0,$lo0,$lo1 + $ST $lo0,($rp) + or $hi0,$at + sltu $at,$tp,$tj + bnez $at,.Lsub + $PTR_ADD $rp,$BNSZ + + $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit + move $tp,$sp + $PTR_SUB $rp,$num # restore rp + not $hi1,$hi0 + + and $ap,$hi0,$sp + and $bp,$hi1,$rp + or $ap,$ap,$bp # ap=borrow?tp:rp + +.align 4 +.Lcopy: $LD $aj,($ap) + $PTR_ADD $ap,$BNSZ + $ST $zero,($tp) + $PTR_ADD $tp,$BNSZ + sltu $at,$tp,$tj + $ST $aj,($rp) + bnez $at,.Lcopy + $PTR_ADD $rp,$BNSZ + + li $a0,1 + li $t0,1 + + .set noreorder + move $sp,$fp + $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp) + $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp) + $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp) + $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp) + $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp) + $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp) + $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp) + $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp) + $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp) + $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp) + $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp) + $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE*$SZREG +.end bn_mul_mont_internal +.rdata +.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by " +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; + +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/mips.pl b/app/openssl/crypto/bn/asm/mips.pl new file mode 100644 index 00000000..d2f3ef7b --- /dev/null +++ b/app/openssl/crypto/bn/asm/mips.pl @@ -0,0 +1,2583 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. +# +# Rights for redistribution and usage in source and binary forms are +# granted according to the OpenSSL license. Warranty of any kind is +# disclaimed. +# ==================================================================== + + +# July 1999 +# +# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. +# +# The module is designed to work with either of the "new" MIPS ABI(5), +# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under +# IRIX 5.x not only because it doesn't support new ABIs but also +# because 5.x kernels put R4x00 CPU into 32-bit mode and all those +# 64-bit instructions (daddu, dmultu, etc.) found below gonna only +# cause illegal instruction exception:-( +# +# In addition the code depends on preprocessor flags set up by MIPSpro +# compiler driver (either as or cc) and therefore (probably?) can't be +# compiled by the GNU assembler. GNU C driver manages fine though... +# I mean as long as -mmips-as is specified or is the default option, +# because then it simply invokes /usr/bin/as which in turn takes +# perfect care of the preprocessor definitions. Another neat feature +# offered by the MIPSpro assembler is an optimization pass. This gave +# me the opportunity to have the code looking more regular as all those +# architecture dependent instruction rescheduling details were left to +# the assembler. Cool, huh? +# +# Performance improvement is astonishing! 'apps/openssl speed rsa dsa' +# goes way over 3 times faster! +# +# + +# October 2010 +# +# Adapt the module even for 32-bit ABIs and other OSes. The former was +# achieved by mechanical replacement of 64-bit arithmetic instructions +# such as dmultu, daddu, etc. with their 32-bit counterparts and +# adjusting offsets denoting multiples of BN_ULONG. Above mentioned +# >3x performance improvement naturally does not apply to 32-bit code +# [because there is no instruction 32-bit compiler can't use], one +# has to content with 40-85% improvement depending on benchmark and +# key length, more for longer keys. + +$flavour = shift; +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +if ($flavour =~ /64|n32/i) { + $LD="ld"; + $ST="sd"; + $MULTU="dmultu"; + $DIVU="ddivu"; + $ADDU="daddu"; + $SUBU="dsubu"; + $SRL="dsrl"; + $SLL="dsll"; + $BNSZ=8; + $PTR_ADD="daddu"; + $PTR_SUB="dsubu"; + $SZREG=8; + $REG_S="sd"; + $REG_L="ld"; +} else { + $LD="lw"; + $ST="sw"; + $MULTU="multu"; + $DIVU="divu"; + $ADDU="addu"; + $SUBU="subu"; + $SRL="srl"; + $SLL="sll"; + $BNSZ=4; + $PTR_ADD="addu"; + $PTR_SUB="subu"; + $SZREG=4; + $REG_S="sw"; + $REG_L="lw"; + $code=".set mips2\n"; +} + +# Below is N32/64 register layout used in the original module. +# +($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); +# +# No special adaptation is required for O32. NUBI on the other hand +# is treated by saving/restoring ($v1,$t0..$t3). + +$gp=$v1 if ($flavour =~ /nubi/i); + +$minus4=$v1; + +$code.=<<___; +.rdata +.asciiz "mips3.s, Version 1.2" +.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov " + +.text +.set noat + +.align 5 +.globl bn_mul_add_words +.ent bn_mul_add_words +bn_mul_add_words: + .set noreorder + bgtz $a2,bn_mul_add_words_internal + move $v0,$zero + jr $ra + move $a0,$v0 +.end bn_mul_add_words + +.align 5 +.ent bn_mul_add_words_internal +bn_mul_add_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + li $minus4,-4 + and $ta0,$a2,$minus4 + beqz $ta0,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + $LD $t0,0($a1) + $MULTU $t0,$a3 + $LD $t1,0($a0) + $LD $t2,$BNSZ($a1) + $LD $t3,$BNSZ($a0) + $LD $ta0,2*$BNSZ($a1) + $LD $ta1,2*$BNSZ($a0) + $ADDU $t1,$v0 + sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit + # values", but it seems to work fine + # even on 64-bit registers. + mflo $at + mfhi $t0 + $ADDU $t1,$at + $ADDU $v0,$t0 + $MULTU $t2,$a3 + sltu $at,$t1,$at + $ST $t1,0($a0) + $ADDU $v0,$at + + $LD $ta2,3*$BNSZ($a1) + $LD $ta3,3*$BNSZ($a0) + $ADDU $t3,$v0 + sltu $v0,$t3,$v0 + mflo $at + mfhi $t2 + $ADDU $t3,$at + $ADDU $v0,$t2 + $MULTU $ta0,$a3 + sltu $at,$t3,$at + $ST $t3,$BNSZ($a0) + $ADDU $v0,$at + + subu $a2,4 + $PTR_ADD $a0,4*$BNSZ + $PTR_ADD $a1,4*$BNSZ + $ADDU $ta1,$v0 + sltu $v0,$ta1,$v0 + mflo $at + mfhi $ta0 + $ADDU $ta1,$at + $ADDU $v0,$ta0 + $MULTU $ta2,$a3 + sltu $at,$ta1,$at + $ST $ta1,-2*$BNSZ($a0) + $ADDU $v0,$at + + + and $ta0,$a2,$minus4 + $ADDU $ta3,$v0 + sltu $v0,$ta3,$v0 + mflo $at + mfhi $ta2 + $ADDU $ta3,$at + $ADDU $v0,$ta2 + sltu $at,$ta3,$at + $ST $ta3,-$BNSZ($a0) + .set noreorder + bgtz $ta0,.L_bn_mul_add_words_loop + $ADDU $v0,$at + + beqz $a2,.L_bn_mul_add_words_return + nop + +.L_bn_mul_add_words_tail: + .set reorder + $LD $t0,0($a1) + $MULTU $t0,$a3 + $LD $t1,0($a0) + subu $a2,1 + $ADDU $t1,$v0 + sltu $v0,$t1,$v0 + mflo $at + mfhi $t0 + $ADDU $t1,$at + $ADDU $v0,$t0 + sltu $at,$t1,$at + $ST $t1,0($a0) + $ADDU $v0,$at + beqz $a2,.L_bn_mul_add_words_return + + $LD $t0,$BNSZ($a1) + $MULTU $t0,$a3 + $LD $t1,$BNSZ($a0) + subu $a2,1 + $ADDU $t1,$v0 + sltu $v0,$t1,$v0 + mflo $at + mfhi $t0 + $ADDU $t1,$at + $ADDU $v0,$t0 + sltu $at,$t1,$at + $ST $t1,$BNSZ($a0) + $ADDU $v0,$at + beqz $a2,.L_bn_mul_add_words_return + + $LD $t0,2*$BNSZ($a1) + $MULTU $t0,$a3 + $LD $t1,2*$BNSZ($a0) + $ADDU $t1,$v0 + sltu $v0,$t1,$v0 + mflo $at + mfhi $t0 + $ADDU $t1,$at + $ADDU $v0,$t0 + sltu $at,$t1,$at + $ST $t1,2*$BNSZ($a0) + $ADDU $v0,$at + +.L_bn_mul_add_words_return: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 +.end bn_mul_add_words_internal + +.align 5 +.globl bn_mul_words +.ent bn_mul_words +bn_mul_words: + .set noreorder + bgtz $a2,bn_mul_words_internal + move $v0,$zero + jr $ra + move $a0,$v0 +.end bn_mul_words + +.align 5 +.ent bn_mul_words_internal +bn_mul_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + li $minus4,-4 + and $ta0,$a2,$minus4 + beqz $ta0,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + $LD $t0,0($a1) + $MULTU $t0,$a3 + $LD $t2,$BNSZ($a1) + $LD $ta0,2*$BNSZ($a1) + $LD $ta2,3*$BNSZ($a1) + mflo $at + mfhi $t0 + $ADDU $v0,$at + sltu $t1,$v0,$at + $MULTU $t2,$a3 + $ST $v0,0($a0) + $ADDU $v0,$t1,$t0 + + subu $a2,4 + $PTR_ADD $a0,4*$BNSZ + $PTR_ADD $a1,4*$BNSZ + mflo $at + mfhi $t2 + $ADDU $v0,$at + sltu $t3,$v0,$at + $MULTU $ta0,$a3 + $ST $v0,-3*$BNSZ($a0) + $ADDU $v0,$t3,$t2 + + mflo $at + mfhi $ta0 + $ADDU $v0,$at + sltu $ta1,$v0,$at + $MULTU $ta2,$a3 + $ST $v0,-2*$BNSZ($a0) + $ADDU $v0,$ta1,$ta0 + + and $ta0,$a2,$minus4 + mflo $at + mfhi $ta2 + $ADDU $v0,$at + sltu $ta3,$v0,$at + $ST $v0,-$BNSZ($a0) + .set noreorder + bgtz $ta0,.L_bn_mul_words_loop + $ADDU $v0,$ta3,$ta2 + + beqz $a2,.L_bn_mul_words_return + nop + +.L_bn_mul_words_tail: + .set reorder + $LD $t0,0($a1) + $MULTU $t0,$a3 + subu $a2,1 + mflo $at + mfhi $t0 + $ADDU $v0,$at + sltu $t1,$v0,$at + $ST $v0,0($a0) + $ADDU $v0,$t1,$t0 + beqz $a2,.L_bn_mul_words_return + + $LD $t0,$BNSZ($a1) + $MULTU $t0,$a3 + subu $a2,1 + mflo $at + mfhi $t0 + $ADDU $v0,$at + sltu $t1,$v0,$at + $ST $v0,$BNSZ($a0) + $ADDU $v0,$t1,$t0 + beqz $a2,.L_bn_mul_words_return + + $LD $t0,2*$BNSZ($a1) + $MULTU $t0,$a3 + mflo $at + mfhi $t0 + $ADDU $v0,$at + sltu $t1,$v0,$at + $ST $v0,2*$BNSZ($a0) + $ADDU $v0,$t1,$t0 + +.L_bn_mul_words_return: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 +.end bn_mul_words_internal + +.align 5 +.globl bn_sqr_words +.ent bn_sqr_words +bn_sqr_words: + .set noreorder + bgtz $a2,bn_sqr_words_internal + move $v0,$zero + jr $ra + move $a0,$v0 +.end bn_sqr_words + +.align 5 +.ent bn_sqr_words_internal +bn_sqr_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + li $minus4,-4 + and $ta0,$a2,$minus4 + beqz $ta0,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + $LD $t0,0($a1) + $MULTU $t0,$t0 + $LD $t2,$BNSZ($a1) + $LD $ta0,2*$BNSZ($a1) + $LD $ta2,3*$BNSZ($a1) + mflo $t1 + mfhi $t0 + $ST $t1,0($a0) + $ST $t0,$BNSZ($a0) + + $MULTU $t2,$t2 + subu $a2,4 + $PTR_ADD $a0,8*$BNSZ + $PTR_ADD $a1,4*$BNSZ + mflo $t3 + mfhi $t2 + $ST $t3,-6*$BNSZ($a0) + $ST $t2,-5*$BNSZ($a0) + + $MULTU $ta0,$ta0 + mflo $ta1 + mfhi $ta0 + $ST $ta1,-4*$BNSZ($a0) + $ST $ta0,-3*$BNSZ($a0) + + + $MULTU $ta2,$ta2 + and $ta0,$a2,$minus4 + mflo $ta3 + mfhi $ta2 + $ST $ta3,-2*$BNSZ($a0) + + .set noreorder + bgtz $ta0,.L_bn_sqr_words_loop + $ST $ta2,-$BNSZ($a0) + + beqz $a2,.L_bn_sqr_words_return + nop + +.L_bn_sqr_words_tail: + .set reorder + $LD $t0,0($a1) + $MULTU $t0,$t0 + subu $a2,1 + mflo $t1 + mfhi $t0 + $ST $t1,0($a0) + $ST $t0,$BNSZ($a0) + beqz $a2,.L_bn_sqr_words_return + + $LD $t0,$BNSZ($a1) + $MULTU $t0,$t0 + subu $a2,1 + mflo $t1 + mfhi $t0 + $ST $t1,2*$BNSZ($a0) + $ST $t0,3*$BNSZ($a0) + beqz $a2,.L_bn_sqr_words_return + + $LD $t0,2*$BNSZ($a1) + $MULTU $t0,$t0 + mflo $t1 + mfhi $t0 + $ST $t1,4*$BNSZ($a0) + $ST $t0,5*$BNSZ($a0) + +.L_bn_sqr_words_return: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 + +.end bn_sqr_words_internal + +.align 5 +.globl bn_add_words +.ent bn_add_words +bn_add_words: + .set noreorder + bgtz $a3,bn_add_words_internal + move $v0,$zero + jr $ra + move $a0,$v0 +.end bn_add_words + +.align 5 +.ent bn_add_words_internal +bn_add_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + li $minus4,-4 + and $at,$a3,$minus4 + beqz $at,.L_bn_add_words_tail + +.L_bn_add_words_loop: + $LD $t0,0($a1) + $LD $ta0,0($a2) + subu $a3,4 + $LD $t1,$BNSZ($a1) + and $at,$a3,$minus4 + $LD $t2,2*$BNSZ($a1) + $PTR_ADD $a2,4*$BNSZ + $LD $t3,3*$BNSZ($a1) + $PTR_ADD $a0,4*$BNSZ + $LD $ta1,-3*$BNSZ($a2) + $PTR_ADD $a1,4*$BNSZ + $LD $ta2,-2*$BNSZ($a2) + $LD $ta3,-$BNSZ($a2) + $ADDU $ta0,$t0 + sltu $t8,$ta0,$t0 + $ADDU $t0,$ta0,$v0 + sltu $v0,$t0,$ta0 + $ST $t0,-4*$BNSZ($a0) + $ADDU $v0,$t8 + + $ADDU $ta1,$t1 + sltu $t9,$ta1,$t1 + $ADDU $t1,$ta1,$v0 + sltu $v0,$t1,$ta1 + $ST $t1,-3*$BNSZ($a0) + $ADDU $v0,$t9 + + $ADDU $ta2,$t2 + sltu $t8,$ta2,$t2 + $ADDU $t2,$ta2,$v0 + sltu $v0,$t2,$ta2 + $ST $t2,-2*$BNSZ($a0) + $ADDU $v0,$t8 + + $ADDU $ta3,$t3 + sltu $t9,$ta3,$t3 + $ADDU $t3,$ta3,$v0 + sltu $v0,$t3,$ta3 + $ST $t3,-$BNSZ($a0) + + .set noreorder + bgtz $at,.L_bn_add_words_loop + $ADDU $v0,$t9 + + beqz $a3,.L_bn_add_words_return + nop + +.L_bn_add_words_tail: + .set reorder + $LD $t0,0($a1) + $LD $ta0,0($a2) + $ADDU $ta0,$t0 + subu $a3,1 + sltu $t8,$ta0,$t0 + $ADDU $t0,$ta0,$v0 + sltu $v0,$t0,$ta0 + $ST $t0,0($a0) + $ADDU $v0,$t8 + beqz $a3,.L_bn_add_words_return + + $LD $t1,$BNSZ($a1) + $LD $ta1,$BNSZ($a2) + $ADDU $ta1,$t1 + subu $a3,1 + sltu $t9,$ta1,$t1 + $ADDU $t1,$ta1,$v0 + sltu $v0,$t1,$ta1 + $ST $t1,$BNSZ($a0) + $ADDU $v0,$t9 + beqz $a3,.L_bn_add_words_return + + $LD $t2,2*$BNSZ($a1) + $LD $ta2,2*$BNSZ($a2) + $ADDU $ta2,$t2 + sltu $t8,$ta2,$t2 + $ADDU $t2,$ta2,$v0 + sltu $v0,$t2,$ta2 + $ST $t2,2*$BNSZ($a0) + $ADDU $v0,$t8 + +.L_bn_add_words_return: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 + +.end bn_add_words_internal + +.align 5 +.globl bn_sub_words +.ent bn_sub_words +bn_sub_words: + .set noreorder + bgtz $a3,bn_sub_words_internal + move $v0,$zero + jr $ra + move $a0,$zero +.end bn_sub_words + +.align 5 +.ent bn_sub_words_internal +bn_sub_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + li $minus4,-4 + and $at,$a3,$minus4 + beqz $at,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + $LD $t0,0($a1) + $LD $ta0,0($a2) + subu $a3,4 + $LD $t1,$BNSZ($a1) + and $at,$a3,$minus4 + $LD $t2,2*$BNSZ($a1) + $PTR_ADD $a2,4*$BNSZ + $LD $t3,3*$BNSZ($a1) + $PTR_ADD $a0,4*$BNSZ + $LD $ta1,-3*$BNSZ($a2) + $PTR_ADD $a1,4*$BNSZ + $LD $ta2,-2*$BNSZ($a2) + $LD $ta3,-$BNSZ($a2) + sltu $t8,$t0,$ta0 + $SUBU $ta0,$t0,$ta0 + $SUBU $t0,$ta0,$v0 + sgtu $v0,$t0,$ta0 + $ST $t0,-4*$BNSZ($a0) + $ADDU $v0,$t8 + + sltu $t9,$t1,$ta1 + $SUBU $ta1,$t1,$ta1 + $SUBU $t1,$ta1,$v0 + sgtu $v0,$t1,$ta1 + $ST $t1,-3*$BNSZ($a0) + $ADDU $v0,$t9 + + + sltu $t8,$t2,$ta2 + $SUBU $ta2,$t2,$ta2 + $SUBU $t2,$ta2,$v0 + sgtu $v0,$t2,$ta2 + $ST $t2,-2*$BNSZ($a0) + $ADDU $v0,$t8 + + sltu $t9,$t3,$ta3 + $SUBU $ta3,$t3,$ta3 + $SUBU $t3,$ta3,$v0 + sgtu $v0,$t3,$ta3 + $ST $t3,-$BNSZ($a0) + + .set noreorder + bgtz $at,.L_bn_sub_words_loop + $ADDU $v0,$t9 + + beqz $a3,.L_bn_sub_words_return + nop + +.L_bn_sub_words_tail: + .set reorder + $LD $t0,0($a1) + $LD $ta0,0($a2) + subu $a3,1 + sltu $t8,$t0,$ta0 + $SUBU $ta0,$t0,$ta0 + $SUBU $t0,$ta0,$v0 + sgtu $v0,$t0,$ta0 + $ST $t0,0($a0) + $ADDU $v0,$t8 + beqz $a3,.L_bn_sub_words_return + + $LD $t1,$BNSZ($a1) + subu $a3,1 + $LD $ta1,$BNSZ($a2) + sltu $t9,$t1,$ta1 + $SUBU $ta1,$t1,$ta1 + $SUBU $t1,$ta1,$v0 + sgtu $v0,$t1,$ta1 + $ST $t1,$BNSZ($a0) + $ADDU $v0,$t9 + beqz $a3,.L_bn_sub_words_return + + $LD $t2,2*$BNSZ($a1) + $LD $ta2,2*$BNSZ($a2) + sltu $t8,$t2,$ta2 + $SUBU $ta2,$t2,$ta2 + $SUBU $t2,$ta2,$v0 + sgtu $v0,$t2,$ta2 + $ST $t2,2*$BNSZ($a0) + $ADDU $v0,$t8 + +.L_bn_sub_words_return: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 +.end bn_sub_words_internal + +.align 5 +.globl bn_div_3_words +.ent bn_div_3_words +bn_div_3_words: + .set noreorder + move $a3,$a0 # we know that bn_div_words does not + # touch $a3, $ta2, $ta3 and preserves $a2 + # so that we can save two arguments + # and return address in registers + # instead of stack:-) + + $LD $a0,($a3) + move $ta2,$a1 + bne $a0,$a2,bn_div_3_words_internal + $LD $a1,-$BNSZ($a3) + li $v0,-1 + jr $ra + move $a0,$v0 +.end bn_div_3_words + +.align 5 +.ent bn_div_3_words_internal +bn_div_3_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + move $ta3,$ra + bal bn_div_words_internal + move $ra,$ta3 + $MULTU $ta2,$v0 + $LD $t2,-2*$BNSZ($a3) + move $ta0,$zero + mfhi $t1 + mflo $t0 + sltu $t8,$t1,$a1 +.L_bn_div_3_words_inner_loop: + bnez $t8,.L_bn_div_3_words_inner_loop_done + sgeu $at,$t2,$t0 + seq $t9,$t1,$a1 + and $at,$t9 + sltu $t3,$t0,$ta2 + $ADDU $a1,$a2 + $SUBU $t1,$t3 + $SUBU $t0,$ta2 + sltu $t8,$t1,$a1 + sltu $ta0,$a1,$a2 + or $t8,$ta0 + .set noreorder + beqz $at,.L_bn_div_3_words_inner_loop + $SUBU $v0,1 + $ADDU $v0,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 +.end bn_div_3_words_internal + +.align 5 +.globl bn_div_words +.ent bn_div_words +bn_div_words: + .set noreorder + bnez $a2,bn_div_words_internal + li $v0,-1 # I would rather signal div-by-zero + # which can be done with 'break 7' + jr $ra + move $a0,$v0 +.end bn_div_words + +.align 5 +.ent bn_div_words_internal +bn_div_words_internal: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + move $v1,$zero + bltz $a2,.L_bn_div_words_body + move $t9,$v1 + $SLL $a2,1 + bgtz $a2,.-4 + addu $t9,1 + + .set reorder + negu $t1,$t9 + li $t2,-1 + $SLL $t2,$t1 + and $t2,$a0 + $SRL $at,$a1,$t1 + .set noreorder + beqz $t2,.+12 + nop + break 6 # signal overflow + .set reorder + $SLL $a0,$t9 + $SLL $a1,$t9 + or $a0,$at +___ +$QT=$ta0; +$HH=$ta1; +$DH=$v1; +$code.=<<___; +.L_bn_div_words_body: + $SRL $DH,$a2,4*$BNSZ # bits + sgeu $at,$a0,$a2 + .set noreorder + beqz $at,.+12 + nop + $SUBU $a0,$a2 + .set reorder + + li $QT,-1 + $SRL $HH,$a0,4*$BNSZ # bits + $SRL $QT,4*$BNSZ # q=0xffffffff + beq $DH,$HH,.L_bn_div_words_skip_div1 + $DIVU $zero,$a0,$DH + mflo $QT +.L_bn_div_words_skip_div1: + $MULTU $a2,$QT + $SLL $t3,$a0,4*$BNSZ # bits + $SRL $at,$a1,4*$BNSZ # bits + or $t3,$at + mflo $t0 + mfhi $t1 +.L_bn_div_words_inner_loop1: + sltu $t2,$t3,$t0 + seq $t8,$HH,$t1 + sltu $at,$HH,$t1 + and $t2,$t8 + sltu $v0,$t0,$a2 + or $at,$t2 + .set noreorder + beqz $at,.L_bn_div_words_inner_loop1_done + $SUBU $t1,$v0 + $SUBU $t0,$a2 + b .L_bn_div_words_inner_loop1 + $SUBU $QT,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + $SLL $a1,4*$BNSZ # bits + $SUBU $a0,$t3,$t0 + $SLL $v0,$QT,4*$BNSZ # bits + + li $QT,-1 + $SRL $HH,$a0,4*$BNSZ # bits + $SRL $QT,4*$BNSZ # q=0xffffffff + beq $DH,$HH,.L_bn_div_words_skip_div2 + $DIVU $zero,$a0,$DH + mflo $QT +.L_bn_div_words_skip_div2: + $MULTU $a2,$QT + $SLL $t3,$a0,4*$BNSZ # bits + $SRL $at,$a1,4*$BNSZ # bits + or $t3,$at + mflo $t0 + mfhi $t1 +.L_bn_div_words_inner_loop2: + sltu $t2,$t3,$t0 + seq $t8,$HH,$t1 + sltu $at,$HH,$t1 + and $t2,$t8 + sltu $v1,$t0,$a2 + or $at,$t2 + .set noreorder + beqz $at,.L_bn_div_words_inner_loop2_done + $SUBU $t1,$v1 + $SUBU $t0,$a2 + b .L_bn_div_words_inner_loop2 + $SUBU $QT,1 + .set reorder +.L_bn_div_words_inner_loop2_done: + + $SUBU $a0,$t3,$t0 + or $v0,$QT + $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it + $SRL $a2,$t9 # restore $a2 + + .set noreorder + move $a1,$v1 +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + move $a0,$v0 +.end bn_div_words_internal +___ +undef $HH; undef $QT; undef $DH; + +($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); +($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); + +($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 +($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 + +($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); + +$code.=<<___; + +.align 5 +.globl bn_mul_comba8 +.ent bn_mul_comba8 +bn_mul_comba8: + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,12*$SZREG,$ra + .mask 0x803ff008,-$SZREG + $PTR_SUB $sp,12*$SZREG + $REG_S $ra,11*$SZREG($sp) + $REG_S $s5,10*$SZREG($sp) + $REG_S $s4,9*$SZREG($sp) + $REG_S $s3,8*$SZREG($sp) + $REG_S $s2,7*$SZREG($sp) + $REG_S $s1,6*$SZREG($sp) + $REG_S $s0,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x003f0000,-$SZREG + $PTR_SUB $sp,6*$SZREG + $REG_S $s5,5*$SZREG($sp) + $REG_S $s4,4*$SZREG($sp) + $REG_S $s3,3*$SZREG($sp) + $REG_S $s2,2*$SZREG($sp) + $REG_S $s1,1*$SZREG($sp) + $REG_S $s0,0*$SZREG($sp) +___ +$code.=<<___; + + .set reorder + $LD $a_0,0($a1) # If compiled with -mips3 option on + # R5000 box assembler barks on this + # 1ine with "should not have mult/div + # as last instruction in bb (R10K + # bug)" warning. If anybody out there + # has a clue about how to circumvent + # this do send me a note. + # + + $LD $b_0,0($a2) + $LD $a_1,$BNSZ($a1) + $LD $a_2,2*$BNSZ($a1) + $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $LD $a_3,3*$BNSZ($a1) + $LD $b_1,$BNSZ($a2) + $LD $b_2,2*$BNSZ($a2) + $LD $b_3,3*$BNSZ($a2) + mflo $c_1 + mfhi $c_2 + + $LD $a_4,4*$BNSZ($a1) + $LD $a_5,5*$BNSZ($a1) + $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + $LD $a_6,6*$BNSZ($a1) + $LD $a_7,7*$BNSZ($a1) + $LD $b_4,4*$BNSZ($a2) + $LD $b_5,5*$BNSZ($a2) + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $ADDU $c_3,$t_2,$at + $LD $b_6,6*$BNSZ($a2) + $LD $b_7,7*$BNSZ($a2) + $ST $c_1,0($a0) # r[0]=c1; + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + $ST $c_2,$BNSZ($a0) # r[1]=c2; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) # r[2]=c3; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $c_3,$c_2,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,3*$BNSZ($a0) # r[3]=c1; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) # r[4]=c2; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,5*$BNSZ($a0) # r[5]=c3; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $c_3,$c_2,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,6*$BNSZ($a0) # r[6]=c1; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,7*$BNSZ($a0) # r[7]=c2; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,8*$BNSZ($a0) # r[8]=c3; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $c_3,$c_2,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,9*$BNSZ($a0) # r[9]=c1; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,10*$BNSZ($a0) # r[10]=c2; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,11*$BNSZ($a0) # r[11]=c3; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $c_3,$c_2,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,12*$BNSZ($a0) # r[12]=c1; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,13*$BNSZ($a0) # r[13]=c2; + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + $ST $c_3,14*$BNSZ($a0) # r[14]=c3; + $ST $c_1,15*$BNSZ($a0) # r[15]=c1; + + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s5,10*$SZREG($sp) + $REG_L $s4,9*$SZREG($sp) + $REG_L $s3,8*$SZREG($sp) + $REG_L $s2,7*$SZREG($sp) + $REG_L $s1,6*$SZREG($sp) + $REG_L $s0,5*$SZREG($sp) + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + jr $ra + $PTR_ADD $sp,12*$SZREG +___ +$code.=<<___ if ($flavour !~ /nubi/i); + $REG_L $s5,5*$SZREG($sp) + $REG_L $s4,4*$SZREG($sp) + $REG_L $s3,3*$SZREG($sp) + $REG_L $s2,2*$SZREG($sp) + $REG_L $s1,1*$SZREG($sp) + $REG_L $s0,0*$SZREG($sp) + jr $ra + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; +.end bn_mul_comba8 + +.align 5 +.globl bn_mul_comba4 +.ent bn_mul_comba4 +bn_mul_comba4: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + $LD $a_0,0($a1) + $LD $b_0,0($a2) + $LD $a_1,$BNSZ($a1) + $LD $a_2,2*$BNSZ($a1) + $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $LD $a_3,3*$BNSZ($a1) + $LD $b_1,$BNSZ($a2) + $LD $b_2,2*$BNSZ($a2) + $LD $b_3,3*$BNSZ($a2) + mflo $c_1 + mfhi $c_2 + $ST $c_1,0($a0) + + $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $ADDU $c_3,$t_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + $ST $c_2,$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $c_3,$c_2,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,3*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $c_1,$c_3,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $c_2,$c_1,$t_2 + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,5*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + $ST $c_1,6*$BNSZ($a0) + $ST $c_2,7*$BNSZ($a0) + + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + nop +.end bn_mul_comba4 +___ + +($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); + +$code.=<<___; + +.align 5 +.globl bn_sqr_comba8 +.ent bn_sqr_comba8 +bn_sqr_comba8: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + $LD $a_0,0($a1) + $LD $a_1,$BNSZ($a1) + $LD $a_2,2*$BNSZ($a1) + $LD $a_3,3*$BNSZ($a1) + + $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $LD $a_4,4*$BNSZ($a1) + $LD $a_5,5*$BNSZ($a1) + $LD $a_6,6*$BNSZ($a1) + $LD $a_7,7*$BNSZ($a1) + mflo $c_1 + mfhi $c_2 + $ST $c_1,0($a0) + + $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $c_3,$t_2,$at + $ST $c_2,$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_3,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,3*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_1,$at + $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_2,$at + $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); + $ADDU $c_2,$at + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,5*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_3,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,6*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_1,$at + $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_1,$at + $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_1,$at + $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,7*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_2,$at + $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_2,$at + $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,8*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_3,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,9*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_1,$at + $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,10*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_2,$at + $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,11*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_3,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,12*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,13*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + $ST $c_3,14*$BNSZ($a0) + $ST $c_1,15*$BNSZ($a0) + + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + nop +.end bn_sqr_comba8 + +.align 5 +.globl bn_sqr_comba4 +.ent bn_sqr_comba4 +bn_sqr_comba4: +___ +$code.=<<___ if ($flavour =~ /nubi/i); + .frame $sp,6*$SZREG,$ra + .mask 0x8000f008,-$SZREG + .set noreorder + $PTR_SUB $sp,6*$SZREG + $REG_S $ra,5*$SZREG($sp) + $REG_S $t3,4*$SZREG($sp) + $REG_S $t2,3*$SZREG($sp) + $REG_S $t1,2*$SZREG($sp) + $REG_S $t0,1*$SZREG($sp) + $REG_S $gp,0*$SZREG($sp) +___ +$code.=<<___; + .set reorder + $LD $a_0,0($a1) + $LD $a_1,$BNSZ($a1) + $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $LD $a_2,2*$BNSZ($a1) + $LD $a_3,3*$BNSZ($a1) + mflo $c_1 + mfhi $c_2 + $ST $c_1,0($a0) + + $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $c_3,$t_2,$at + $ST $c_2,$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_3,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + mflo $t_1 + mfhi $t_2 + slt $at,$t_2,$zero + $ADDU $c_3,$at + $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); + $SLL $t_2,1 + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,3*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_1,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 + sltu $at,$c_2,$t_1 + $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); + $ADDU $t_2,$at + $ADDU $c_3,$t_2 + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + slt $c_2,$t_2,$zero + $SLL $t_2,1 + $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); + slt $a2,$t_1,$zero + $ADDU $t_2,$a2 + $SLL $t_1,1 + $ADDU $c_3,$t_1 + sltu $at,$c_3,$t_1 + $ADDU $t_2,$at + $ADDU $c_1,$t_2 + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,5*$BNSZ($a0) + + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 + sltu $at,$c_1,$t_1 + $ADDU $t_2,$at + $ADDU $c_2,$t_2 + $ST $c_1,6*$BNSZ($a0) + $ST $c_2,7*$BNSZ($a0) + + .set noreorder +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $t3,4*$SZREG($sp) + $REG_L $t2,3*$SZREG($sp) + $REG_L $t1,2*$SZREG($sp) + $REG_L $t0,1*$SZREG($sp) + $REG_L $gp,0*$SZREG($sp) + $PTR_ADD $sp,6*$SZREG +___ +$code.=<<___; + jr $ra + nop +.end bn_sqr_comba4 +___ +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/modexp512-x86_64.S b/app/openssl/crypto/bn/asm/modexp512-x86_64.S new file mode 100644 index 00000000..6cccafb8 --- /dev/null +++ b/app/openssl/crypto/bn/asm/modexp512-x86_64.S @@ -0,0 +1,1773 @@ +.text + +.type MULADD_128x512,@function +.align 16 +MULADD_128x512: + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 +.size MULADD_128x512,.-MULADD_128x512 +.type mont_reduce,@function +.align 16 +mont_reduce: + leaq 192(%rsp),%rdi + movq 32(%rsp),%rsi + addq $576,%rsi + leaq 520(%rsp),%rcx + + movq 96(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + movq (%rcx),%r8 + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + movq 8(%rcx),%r9 + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + movq 16(%rcx),%r10 + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + movq 24(%rcx),%r11 + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + movq 32(%rcx),%r12 + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + movq 40(%rcx),%r13 + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + movq 48(%rcx),%r14 + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + movq 56(%rcx),%r15 + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 104(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 112(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 120(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + xorq %rax,%rax + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + adcq 80(%rcx),%r10 + adcq 88(%rcx),%r11 + adcq $0,%rax + + + + + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,%rbp + movq %r11,88(%rdi) + + movq %rax,384(%rsp) + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + + + + + + + + addq $80,%rdi + + addq $64,%rsi + leaq 296(%rsp),%rcx + + call MULADD_128x512 + + movq 384(%rsp),%rax + + + addq -16(%rdi),%r8 + adcq -8(%rdi),%r9 + movq %r8,64(%rcx) + movq %r9,72(%rcx) + + adcq %rax,%rax + movq %rax,384(%rsp) + + leaq 192(%rsp),%rdi + addq $64,%rsi + + + + + + movq (%rsi),%r8 + movq 8(%rsi),%rbx + + movq (%rcx),%rax + mulq %r8 + movq %rax,%rbp + movq %rdx,%r9 + + movq 8(%rcx),%rax + mulq %r8 + addq %rax,%r9 + + movq (%rcx),%rax + mulq %rbx + addq %rax,%r9 + + movq %r9,8(%rdi) + + + subq $192,%rsi + + movq (%rcx),%r8 + movq 8(%rcx),%r9 + + call MULADD_128x512 + + + + + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdi + movq 24(%rsi),%rdx + + + movq 384(%rsp),%rbp + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + + + adcq %rbp,%rbp + + + + shlq $3,%rbp + movq 32(%rsp),%rcx + addq %rcx,%rbp + + + xorq %rsi,%rsi + + addq 0(%rbp),%r10 + adcq 64(%rbp),%r11 + adcq 128(%rbp),%r12 + adcq 192(%rbp),%r13 + adcq 256(%rbp),%r14 + adcq 320(%rbp),%r15 + adcq 384(%rbp),%r8 + adcq 448(%rbp),%r9 + + + + sbbq $0,%rsi + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + movq $1,%rbp + subq %rax,%r10 + sbbq %rbx,%r11 + sbbq %rdi,%r12 + sbbq %rdx,%r13 + + + + + sbbq $0,%rbp + + + + addq $512,%rcx + movq 32(%rcx),%rax + movq 40(%rcx),%rbx + movq 48(%rcx),%rdi + movq 56(%rcx),%rdx + + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + + + subq $1,%rbp + + sbbq %rax,%r14 + sbbq %rbx,%r15 + sbbq %rdi,%r8 + sbbq %rdx,%r9 + + + + movq 144(%rsp),%rsi + movq %r10,0(%rsi) + movq %r11,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq %r8,48(%rsi) + movq %r9,56(%rsi) + + .byte 0xf3,0xc3 +.size mont_reduce,.-mont_reduce +.type mont_mul_a3b,@function +.align 16 +mont_mul_a3b: + + + + + movq 0(%rdi),%rbp + + movq %r10,%rax + mulq %rbp + movq %rax,520(%rsp) + movq %rdx,%r10 + movq %r11,%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r12,%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r13,%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r14,%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r15,%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r8,%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,528(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 16(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,536(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq 24(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %r12,544(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq 32(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %r13,552(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq 40(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %r14,560(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq 48(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %r15,568(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq 56(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,576(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,584(%rsp) + movq %r10,592(%rsp) + movq %r11,600(%rsp) + movq %r12,608(%rsp) + movq %r13,616(%rsp) + movq %r14,624(%rsp) + movq %r15,632(%rsp) + movq %r8,640(%rsp) + + + + + + jmp mont_reduce + + +.size mont_mul_a3b,.-mont_mul_a3b +.type sqr_reduce,@function +.align 16 +sqr_reduce: + movq 16(%rsp),%rcx + + + + movq %r10,%rbx + + movq %r11,%rax + mulq %rbx + movq %rax,528(%rsp) + movq %rdx,%r10 + movq %r12,%rax + mulq %rbx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r13,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r14,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r15,%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r8,%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%rsi + + movq %r10,536(%rsp) + + + + + + movq 8(%rcx),%rbx + + movq 16(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,544(%rsp) + + movq %rdx,%r10 + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,552(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 16(%rcx),%rbx + + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %r13,560(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %r14,568(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r12 + + + + + + movq 24(%rcx),%rbx + + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,576(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + movq %rsi,584(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + + movq %rdx,%r15 + + + + + movq 32(%rcx),%rbx + + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,592(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,600(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 40(%rcx),%rbx + + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,608(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + movq %r11,616(%rsp) + + movq %rdx,%r12 + + + + + movq %r8,%rbx + + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %r12,624(%rsp) + + movq %rdx,632(%rsp) + + + movq 528(%rsp),%r10 + movq 536(%rsp),%r11 + movq 544(%rsp),%r12 + movq 552(%rsp),%r13 + movq 560(%rsp),%r14 + movq 568(%rsp),%r15 + + movq 24(%rcx),%rax + mulq %rax + movq %rax,%rdi + movq %rdx,%r8 + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r8 + + movq 0(%rcx),%rax + mulq %rax + movq %rax,520(%rsp) + movq %rdx,%rbx + + movq 8(%rcx),%rax + mulq %rax + + addq %rbx,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbx + movq %r10,528(%rsp) + movq %r11,536(%rsp) + + movq 16(%rcx),%rax + mulq %rax + + addq %rbx,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbx + + movq %r12,544(%rsp) + movq %r13,552(%rsp) + + xorq %rbp,%rbp + addq %rbx,%r14 + adcq %rdi,%r15 + adcq $0,%rbp + + movq %r14,560(%rsp) + movq %r15,568(%rsp) + + + + + movq 576(%rsp),%r10 + movq 584(%rsp),%r11 + movq 592(%rsp),%r12 + movq 600(%rsp),%r13 + movq 608(%rsp),%r14 + movq 616(%rsp),%r15 + movq 624(%rsp),%rdi + movq 632(%rsp),%rsi + + movq %r9,%rax + mulq %rax + movq %rax,%r9 + movq %rdx,%rbx + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq %rdi,%rdi + adcq %rsi,%rsi + adcq $0,%rbx + + addq %rbp,%r10 + + movq 32(%rcx),%rax + mulq %rax + + addq %r8,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r10,576(%rsp) + movq %r11,584(%rsp) + + movq 40(%rcx),%rax + mulq %rax + + addq %rbp,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r12,592(%rsp) + movq %r13,600(%rsp) + + movq 48(%rcx),%rax + mulq %rax + + addq %rbp,%r14 + adcq %rax,%r15 + adcq $0,%rdx + + movq %r14,608(%rsp) + movq %r15,616(%rsp) + + addq %rdx,%rdi + adcq %r9,%rsi + adcq $0,%rbx + + movq %rdi,624(%rsp) + movq %rsi,632(%rsp) + movq %rbx,640(%rsp) + + jmp mont_reduce + + +.size sqr_reduce,.-sqr_reduce +.globl mod_exp_512 +.type mod_exp_512,@function +mod_exp_512: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r8 + subq $2688,%rsp + andq $-64,%rsp + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rcx,24(%rsp) +.Lbody: + + + + pxor %xmm4,%xmm4 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqa %xmm4,512(%rsp) + movdqa %xmm4,528(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,544(%rsp) + movdqa %xmm1,560(%rsp) + movdqa %xmm2,576(%rsp) + movdqa %xmm3,592(%rsp) + + + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + + leaq 384(%rsp),%rbx + movq %rbx,136(%rsp) + call mont_reduce + + + leaq 448(%rsp),%rcx + xorq %rax,%rax + movq %rax,0(%rcx) + movq %rax,8(%rcx) + movq %rax,24(%rcx) + movq %rax,32(%rcx) + movq %rax,40(%rcx) + movq %rax,48(%rcx) + movq %rax,56(%rcx) + movq %rax,128(%rsp) + movq $1,16(%rcx) + + leaq 640(%rsp),%rbp + movq %rcx,%rsi + movq %rbp,%rdi + movq $8,%rax +loop_0: + movq (%rcx),%rbx + movw %bx,(%rdi) + shrq $16,%rbx + movw %bx,64(%rdi) + shrq $16,%rbx + movw %bx,128(%rdi) + shrq $16,%rbx + movw %bx,192(%rdi) + leaq 8(%rcx),%rcx + leaq 256(%rdi),%rdi + decq %rax + jnz loop_0 + movq $31,%rax + movq %rax,32(%rsp) + movq %rbp,40(%rsp) + + movq %rsi,136(%rsp) + movq 0(%rsi),%r10 + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 +init_loop: + leaq 384(%rsp),%rdi + call mont_mul_a3b + leaq 448(%rsp),%rsi + movq 40(%rsp),%rbp + addq $2,%rbp + movq %rbp,40(%rsp) + movq %rsi,%rcx + movq $8,%rax +loop_1: + movq (%rcx),%rbx + movw %bx,(%rbp) + shrq $16,%rbx + movw %bx,64(%rbp) + shrq $16,%rbx + movw %bx,128(%rbp) + shrq $16,%rbx + movw %bx,192(%rbp) + leaq 8(%rcx),%rcx + leaq 256(%rbp),%rbp + decq %rax + jnz loop_1 + movq 32(%rsp),%rax + subq $1,%rax + movq %rax,32(%rsp) + jne init_loop + + + + movdqa %xmm0,64(%rsp) + movdqa %xmm1,80(%rsp) + movdqa %xmm2,96(%rsp) + movdqa %xmm3,112(%rsp) + + + + + + movl 126(%rsp),%eax + movq %rax,%rdx + shrq $11,%rax + andl $2047,%edx + movl %edx,126(%rsp) + leaq 640(%rsp,%rax,2),%rsi + movq 8(%rsp),%rdx + movq $4,%rbp +loop_2: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_2 + movq $505,48(%rsp) + + movq 8(%rsp),%rcx + movq %rcx,136(%rsp) + movq 0(%rcx),%r10 + movq 8(%rcx),%r11 + movq 16(%rcx),%r12 + movq 24(%rcx),%r13 + movq 32(%rcx),%r14 + movq 40(%rcx),%r15 + movq 48(%rcx),%r8 + movq 56(%rcx),%r9 + jmp sqr_2 + +main_loop_a3b: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2: + call sqr_reduce + call sqr_reduce + + + + movq 48(%rsp),%rcx + movq %rcx,%rax + shrq $4,%rax + movl 64(%rsp,%rax,2),%edx + andq $15,%rcx + shrq %cl,%rdx + andq $31,%rdx + + leaq 640(%rsp,%rdx,2),%rsi + leaq 448(%rsp),%rdx + movq %rdx,%rdi + movq $4,%rbp +loop_3: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_3 + movq 8(%rsp),%rsi + call mont_mul_a3b + + + + movq 48(%rsp),%rcx + subq $5,%rcx + movq %rcx,48(%rsp) + jge main_loop_a3b + + + +end_main_loop_a3b: + + + movq 8(%rsp),%rdx + pxor %xmm4,%xmm4 + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + movdqa %xmm4,576(%rsp) + movdqa %xmm4,592(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,512(%rsp) + movdqa %xmm1,528(%rsp) + movdqa %xmm2,544(%rsp) + movdqa %xmm3,560(%rsp) + call mont_reduce + + + + movq 8(%rsp),%rax + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%r12 + movq 40(%rax),%r13 + movq 48(%rax),%r14 + movq 56(%rax),%r15 + + + movq 24(%rsp),%rbx + addq $512,%rbx + + subq 0(%rbx),%r8 + sbbq 8(%rbx),%r9 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + sbbq 32(%rbx),%r12 + sbbq 40(%rbx),%r13 + sbbq 48(%rbx),%r14 + sbbq 56(%rbx),%r15 + + + movq 0(%rax),%rsi + movq 8(%rax),%rdi + movq 16(%rax),%rcx + movq 24(%rax),%rdx + cmovncq %r8,%rsi + cmovncq %r9,%rdi + cmovncq %r10,%rcx + cmovncq %r11,%rdx + movq %rsi,0(%rax) + movq %rdi,8(%rax) + movq %rcx,16(%rax) + movq %rdx,24(%rax) + + movq 32(%rax),%rsi + movq 40(%rax),%rdi + movq 48(%rax),%rcx + movq 56(%rax),%rdx + cmovncq %r12,%rsi + cmovncq %r13,%rdi + cmovncq %r14,%rcx + cmovncq %r15,%rdx + movq %rsi,32(%rax) + movq %rdi,40(%rax) + movq %rcx,48(%rax) + movq %rdx,56(%rax) + + movq 0(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbx + movq 40(%rsi),%rbp + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size mod_exp_512, . - mod_exp_512 diff --git a/app/openssl/crypto/bn/asm/modexp512-x86_64.pl b/app/openssl/crypto/bn/asm/modexp512-x86_64.pl new file mode 100644 index 00000000..bfd6e975 --- /dev/null +++ b/app/openssl/crypto/bn/asm/modexp512-x86_64.pl @@ -0,0 +1,1497 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2010-2011 Intel Corp. +# Author: Vinodh.Gopal@intel.com +# Jim Guilford +# Erdinc.Ozturk@intel.com +# Maxim.Perminov@intel.com +# +# More information about algorithm used can be found at: +# http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf +# +# ==================================================================== +# Copyright (c) 2011 The OpenSSL Project. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgment: +# "This product includes software developed by the OpenSSL Project +# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" +# +# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to +# endorse or promote products derived from this software without +# prior written permission. For written permission, please contact +# licensing@OpenSSL.org. +# +# 5. Products derived from this software may not be called "OpenSSL" +# nor may "OpenSSL" appear in their names without prior written +# permission of the OpenSSL Project. +# +# 6. Redistributions of any form whatsoever must retain the following +# acknowledgment: +# "This product includes software developed by the OpenSSL Project +# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" +# +# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY +# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR +# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. +# ==================================================================== + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +use strict; +my $code=".text\n\n"; +my $m=0; + +# +# Define x512 macros +# + +#MULSTEP_512_ADD MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2 +# +# uses rax, rdx, and args +sub MULSTEP_512_ADD +{ + my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_; + my @X=@$x; # make a copy +$code.=<<___; + mov (+8*0)($SRC2), %rax + mul $OP # rdx:rax = %OP * [0] + mov ($ASRC), $X[0] + add %rax, $X[0] + adc \$0, %rdx + mov $X[0], $DST +___ +for(my $i=1;$i<8;$i++) { +$code.=<<___; + mov %rdx, $TMP + + mov (+8*$i)($SRC2), %rax + mul $OP # rdx:rax = %OP * [$i] + mov (+8*$i)($ASRC), $X[$i] + add %rax, $X[$i] + adc \$0, %rdx + add $TMP, $X[$i] + adc \$0, %rdx +___ +} +$code.=<<___; + mov %rdx, $X[0] +___ +} + +#MULSTEP_512 MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp +# +# uses rax, rdx, and args +sub MULSTEP_512 +{ + my ($x, $DST, $SRC2, $OP, $TMP)=@_; + my @X=@$x; # make a copy +$code.=<<___; + mov (+8*0)($SRC2), %rax + mul $OP # rdx:rax = %OP * [0] + add %rax, $X[0] + adc \$0, %rdx + mov $X[0], $DST +___ +for(my $i=1;$i<8;$i++) { +$code.=<<___; + mov %rdx, $TMP + + mov (+8*$i)($SRC2), %rax + mul $OP # rdx:rax = %OP * [$i] + add %rax, $X[$i] + adc \$0, %rdx + add $TMP, $X[$i] + adc \$0, %rdx +___ +} +$code.=<<___; + mov %rdx, $X[0] +___ +} + +# +# Swizzle Macros +# + +# macro to copy data from flat space to swizzled table +#MACRO swizzle pDst, pSrc, tmp1, tmp2 +# pDst and pSrc are modified +sub swizzle +{ + my ($pDst, $pSrc, $cnt, $d0)=@_; +$code.=<<___; + mov \$8, $cnt +loop_$m: + mov ($pSrc), $d0 + mov $d0#w, ($pDst) + shr \$16, $d0 + mov $d0#w, (+64*1)($pDst) + shr \$16, $d0 + mov $d0#w, (+64*2)($pDst) + shr \$16, $d0 + mov $d0#w, (+64*3)($pDst) + lea 8($pSrc), $pSrc + lea 64*4($pDst), $pDst + dec $cnt + jnz loop_$m +___ + + $m++; +} + +# macro to copy data from swizzled table to flat space +#MACRO unswizzle pDst, pSrc, tmp*3 +sub unswizzle +{ + my ($pDst, $pSrc, $cnt, $d0, $d1)=@_; +$code.=<<___; + mov \$4, $cnt +loop_$m: + movzxw (+64*3+256*0)($pSrc), $d0 + movzxw (+64*3+256*1)($pSrc), $d1 + shl \$16, $d0 + shl \$16, $d1 + mov (+64*2+256*0)($pSrc), $d0#w + mov (+64*2+256*1)($pSrc), $d1#w + shl \$16, $d0 + shl \$16, $d1 + mov (+64*1+256*0)($pSrc), $d0#w + mov (+64*1+256*1)($pSrc), $d1#w + shl \$16, $d0 + shl \$16, $d1 + mov (+64*0+256*0)($pSrc), $d0#w + mov (+64*0+256*1)($pSrc), $d1#w + mov $d0, (+8*0)($pDst) + mov $d1, (+8*1)($pDst) + lea 256*2($pSrc), $pSrc + lea 8*2($pDst), $pDst + sub \$1, $cnt + jnz loop_$m +___ + + $m++; +} + +# +# Data Structures +# + +# Reduce Data +# +# +# Offset Value +# 0C0 Carries +# 0B8 X2[10] +# 0B0 X2[9] +# 0A8 X2[8] +# 0A0 X2[7] +# 098 X2[6] +# 090 X2[5] +# 088 X2[4] +# 080 X2[3] +# 078 X2[2] +# 070 X2[1] +# 068 X2[0] +# 060 X1[12] P[10] +# 058 X1[11] P[9] Z[8] +# 050 X1[10] P[8] Z[7] +# 048 X1[9] P[7] Z[6] +# 040 X1[8] P[6] Z[5] +# 038 X1[7] P[5] Z[4] +# 030 X1[6] P[4] Z[3] +# 028 X1[5] P[3] Z[2] +# 020 X1[4] P[2] Z[1] +# 018 X1[3] P[1] Z[0] +# 010 X1[2] P[0] Y[2] +# 008 X1[1] Q[1] Y[1] +# 000 X1[0] Q[0] Y[0] + +my $X1_offset = 0; # 13 qwords +my $X2_offset = $X1_offset + 13*8; # 11 qwords +my $Carries_offset = $X2_offset + 11*8; # 1 qword +my $Q_offset = 0; # 2 qwords +my $P_offset = $Q_offset + 2*8; # 11 qwords +my $Y_offset = 0; # 3 qwords +my $Z_offset = $Y_offset + 3*8; # 9 qwords + +my $Red_Data_Size = $Carries_offset + 1*8; # (25 qwords) + +# +# Stack Frame +# +# +# offset value +# ... +# ... +# 280 Garray + +# 278 tmp16[15] +# ... ... +# 200 tmp16[0] + +# 1F8 tmp[7] +# ... ... +# 1C0 tmp[0] + +# 1B8 GT[7] +# ... ... +# 180 GT[0] + +# 178 Reduce Data +# ... ... +# 0B8 Reduce Data +# 0B0 reserved +# 0A8 reserved +# 0A0 reserved +# 098 reserved +# 090 reserved +# 088 reduce result addr +# 080 exp[8] + +# ... +# 048 exp[1] +# 040 exp[0] + +# 038 reserved +# 030 loop_idx +# 028 pg +# 020 i +# 018 pData ; arg 4 +# 010 pG ; arg 2 +# 008 pResult ; arg 1 +# 000 rsp ; stack pointer before subtract + +my $rsp_offset = 0; +my $pResult_offset = 8*1 + $rsp_offset; +my $pG_offset = 8*1 + $pResult_offset; +my $pData_offset = 8*1 + $pG_offset; +my $i_offset = 8*1 + $pData_offset; +my $pg_offset = 8*1 + $i_offset; +my $loop_idx_offset = 8*1 + $pg_offset; +my $reserved1_offset = 8*1 + $loop_idx_offset; +my $exp_offset = 8*1 + $reserved1_offset; +my $red_result_addr_offset= 8*9 + $exp_offset; +my $reserved2_offset = 8*1 + $red_result_addr_offset; +my $Reduce_Data_offset = 8*5 + $reserved2_offset; +my $GT_offset = $Red_Data_Size + $Reduce_Data_offset; +my $tmp_offset = 8*8 + $GT_offset; +my $tmp16_offset = 8*8 + $tmp_offset; +my $garray_offset = 8*16 + $tmp16_offset; +my $mem_size = 8*8*32 + $garray_offset; + +# +# Offsets within Reduce Data +# +# +# struct MODF_2FOLD_MONT_512_C1_DATA { +# UINT64 t[8][8]; +# UINT64 m[8]; +# UINT64 m1[8]; /* 2^768 % m */ +# UINT64 m2[8]; /* 2^640 % m */ +# UINT64 k1[2]; /* (- 1/m) % 2^128 */ +# }; + +my $T = 0; +my $M = 512; # = 8 * 8 * 8 +my $M1 = 576; # = 8 * 8 * 9 /* += 8 * 8 */ +my $M2 = 640; # = 8 * 8 * 10 /* += 8 * 8 */ +my $K1 = 704; # = 8 * 8 * 11 /* += 8 * 8 */ + +# +# FUNCTIONS +# + +{{{ +# +# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords) +# and add 512-bits (8 qwords) +# to get 640 bits (10 qwords) +# Input: 128-bit mul source: [rdi+8*1], rbp +# 512-bit mul source: [rsi+8*n] +# 512-bit add source: r15, r14, ..., r9, r8 +# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0] +# Clobbers all regs except: rcx, rsi, rdi +$code.=<<___; +.type MULADD_128x512,\@abi-omnipotent +.align 16 +MULADD_128x512: +___ + &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx"); +$code.=<<___; + mov (+8*1)(%rdi), %rbp +___ + &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx"); +$code.=<<___; + ret +.size MULADD_128x512,.-MULADD_128x512 +___ +}}} + +{{{ +#MULADD_256x512 MACRO pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0 +# +# Inputs: pDst: Destination (768 bits, 12 qwords) +# pA: Multiplicand (1024 bits, 16 qwords) +# pB: Multiplicand (512 bits, 8 qwords) +# Dst = Ah * B + Al +# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits) +# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0] +# Uses registers: arguments, RAX, RDX +sub MULADD_256x512 +{ + my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_; +$code.=<<___; + mov (+8*12)($pA), $OP +___ + &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP); + push(@$X,shift(@$X)); + +$code.=<<___; + mov (+8*13)($pA), $OP +___ + &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP); + push(@$X,shift(@$X)); + +$code.=<<___; + mov (+8*14)($pA), $OP +___ + &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP); + push(@$X,shift(@$X)); + +$code.=<<___; + mov (+8*15)($pA), $OP +___ + &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP); + push(@$X,shift(@$X)); +} + +# +# mont_reduce(UINT64 *x, /* 1024 bits, 16 qwords */ +# UINT64 *m, /* 512 bits, 8 qwords */ +# MODF_2FOLD_MONT_512_C1_DATA *data, +# UINT64 *r) /* 512 bits, 8 qwords */ +# Input: x (number to be reduced): tmp16 (Implicit) +# m (modulus): [pM] (Implicit) +# data (reduce data): [pData] (Implicit) +# Output: r (result): Address in [red_res_addr] +# result also in: r9, r8, r15, r14, r13, r12, r11, r10 + +my @X=map("%r$_",(8..15)); + +$code.=<<___; +.type mont_reduce,\@abi-omnipotent +.align 16 +mont_reduce: +___ + +my $STACK_DEPTH = 8; + # + # X1 = Xh * M1 + Xl +$code.=<<___; + lea (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi # pX1 (Dst) 769 bits, 13 qwords + mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi # pM1 (Bsrc) 512 bits, 8 qwords + add \$$M1, %rsi + lea (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx # X (Asrc) 1024 bits, 16 qwords + +___ + + &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X); # rotates @X 4 times + # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0] + +$code.=<<___; + xor %rax, %rax + # X1 += xl + add (+8*8)(%rcx), $X[4] + adc (+8*9)(%rcx), $X[5] + adc (+8*10)(%rcx), $X[6] + adc (+8*11)(%rcx), $X[7] + adc \$0, %rax + # X1 is now rax, r11-r8, r15-r12, tmp16[3:0] + + # + # check for carry ;; carry stored in rax + mov $X[4], (+8*8)(%rdi) # rdi points to X1 + mov $X[5], (+8*9)(%rdi) + mov $X[6], %rbp + mov $X[7], (+8*11)(%rdi) + + mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) + + mov (+8*0)(%rdi), $X[4] + mov (+8*1)(%rdi), $X[5] + mov (+8*2)(%rdi), $X[6] + mov (+8*3)(%rdi), $X[7] + + # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8 + # rdi -> X1 + # rsi -> M1 + + # + # X2 = Xh * M2 + Xl + # do first part (X2 = Xh * M2) + add \$8*10, %rdi # rdi -> pXh ; 128 bits, 2 qwords + # Xh is actually { [rdi+8*1], rbp } + add \$`$M2-$M1`, %rsi # rsi -> M2 + lea (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx # rcx -> pX2 ; 641 bits, 11 qwords +___ + unshift(@X,pop(@X)); unshift(@X,pop(@X)); +$code.=<<___; + + call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 + # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] + mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax + + # X2 += Xl + add (+8*8-8*10)(%rdi), $X[6] # (-8*10) is to adjust rdi -> Xh to Xl + adc (+8*9-8*10)(%rdi), $X[7] + mov $X[6], (+8*8)(%rcx) + mov $X[7], (+8*9)(%rcx) + + adc %rax, %rax + mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) + + lea (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi # rdi -> pQ ; 128 bits, 2 qwords + add \$`$K1-$M2`, %rsi # rsi -> pK1 ; 128 bits, 2 qwords + + # MUL_128x128t128 rdi, rcx, rsi ; Q = X2 * K1 (bottom half) + # B1:B0 = rsi[1:0] = K1[1:0] + # A1:A0 = rcx[1:0] = X2[1:0] + # Result = rdi[1],rbp = Q[1],rbp + mov (%rsi), %r8 # B0 + mov (+8*1)(%rsi), %rbx # B1 + + mov (%rcx), %rax # A0 + mul %r8 # B0 + mov %rax, %rbp + mov %rdx, %r9 + + mov (+8*1)(%rcx), %rax # A1 + mul %r8 # B0 + add %rax, %r9 + + mov (%rcx), %rax # A0 + mul %rbx # B1 + add %rax, %r9 + + mov %r9, (+8*1)(%rdi) + # end MUL_128x128t128 + + sub \$`$K1-$M`, %rsi + + mov (%rcx), $X[6] + mov (+8*1)(%rcx), $X[7] # r9:r8 = X2[1:0] + + call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 + # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] + + # load first half of m to rdx, rdi, rbx, rax + # moved this here for efficiency + mov (+8*0)(%rsi), %rax + mov (+8*1)(%rsi), %rbx + mov (+8*2)(%rsi), %rdi + mov (+8*3)(%rsi), %rdx + + # continue with reduction + mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp + + add (+8*8)(%rcx), $X[6] + adc (+8*9)(%rcx), $X[7] + + #accumulate the final carry to rbp + adc %rbp, %rbp + + # Add in overflow corrections: R = (X2>>128) += T[overflow] + # R = {r9, r8, r15, r14, ..., r10} + shl \$3, %rbp + mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx # rsi -> Data (and points to T) + add %rcx, %rbp # pT ; 512 bits, 8 qwords, spread out + + # rsi will be used to generate a mask after the addition + xor %rsi, %rsi + + add (+8*8*0)(%rbp), $X[0] + adc (+8*8*1)(%rbp), $X[1] + adc (+8*8*2)(%rbp), $X[2] + adc (+8*8*3)(%rbp), $X[3] + adc (+8*8*4)(%rbp), $X[4] + adc (+8*8*5)(%rbp), $X[5] + adc (+8*8*6)(%rbp), $X[6] + adc (+8*8*7)(%rbp), $X[7] + + # if there is a carry: rsi = 0xFFFFFFFFFFFFFFFF + # if carry is clear: rsi = 0x0000000000000000 + sbb \$0, %rsi + + # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m + and %rsi, %rax + and %rsi, %rbx + and %rsi, %rdi + and %rsi, %rdx + + mov \$1, %rbp + sub %rax, $X[0] + sbb %rbx, $X[1] + sbb %rdi, $X[2] + sbb %rdx, $X[3] + + # if there is a borrow: rbp = 0 + # if there is no borrow: rbp = 1 + # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m + sbb \$0, %rbp + + #load second half of m to rdx, rdi, rbx, rax + + add \$$M, %rcx + mov (+8*4)(%rcx), %rax + mov (+8*5)(%rcx), %rbx + mov (+8*6)(%rcx), %rdi + mov (+8*7)(%rcx), %rdx + + # use the rsi mask as before + # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m + and %rsi, %rax + and %rsi, %rbx + and %rsi, %rdi + and %rsi, %rdx + + # if rbp = 0, there was a borrow before, it is moved to the carry flag + # if rbp = 1, there was not a borrow before, carry flag is cleared + sub \$1, %rbp + + sbb %rax, $X[4] + sbb %rbx, $X[5] + sbb %rdi, $X[6] + sbb %rdx, $X[7] + + # write R back to memory + + mov (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi + mov $X[0], (+8*0)(%rsi) + mov $X[1], (+8*1)(%rsi) + mov $X[2], (+8*2)(%rsi) + mov $X[3], (+8*3)(%rsi) + mov $X[4], (+8*4)(%rsi) + mov $X[5], (+8*5)(%rsi) + mov $X[6], (+8*6)(%rsi) + mov $X[7], (+8*7)(%rsi) + + ret +.size mont_reduce,.-mont_reduce +___ +}}} + +{{{ +#MUL_512x512 MACRO pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2 +# +# Inputs: pDst: Destination (1024 bits, 16 qwords) +# pA: Multiplicand (512 bits, 8 qwords) +# pB: Multiplicand (512 bits, 8 qwords) +# Uses registers rax, rdx, args +# B operand in [pB] and also in x7...x0 +sub MUL_512x512 +{ + my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_; + my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); + my @X=@$x; # make a copy + +$code.=<<___; + mov (+8*0)($pA), $OP + + mov $X[0], %rax + mul $OP # rdx:rax = %OP * [0] + mov %rax, (+$pDst_o+8*0)($pDst) + mov %rdx, $X[0] +___ +for(my $i=1;$i<8;$i++) { +$code.=<<___; + mov $X[$i], %rax + mul $OP # rdx:rax = %OP * [$i] + add %rax, $X[$i-1] + adc \$0, %rdx + mov %rdx, $X[$i] +___ +} + +for(my $i=1;$i<8;$i++) { +$code.=<<___; + mov (+8*$i)($pA), $OP +___ + + &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP); + push(@X,shift(@X)); +} + +$code.=<<___; + mov $X[0], (+$pDst_o+8*8)($pDst) + mov $X[1], (+$pDst_o+8*9)($pDst) + mov $X[2], (+$pDst_o+8*10)($pDst) + mov $X[3], (+$pDst_o+8*11)($pDst) + mov $X[4], (+$pDst_o+8*12)($pDst) + mov $X[5], (+$pDst_o+8*13)($pDst) + mov $X[6], (+$pDst_o+8*14)($pDst) + mov $X[7], (+$pDst_o+8*15)($pDst) +___ +} + +# +# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits) +# Input: src1: Address of source 1: rdi +# src2: Address of source 2: rsi +# Output: dst: Address of destination: [red_res_addr] +# src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10 +# Temp: Clobbers [tmp16], all registers +$code.=<<___; +.type mont_mul_a3b,\@abi-omnipotent +.align 16 +mont_mul_a3b: + # + # multiply tmp = src1 * src2 + # For multiply: dst = rcx, src1 = rdi, src2 = rsi + # stack depth is extra 8 from call +___ + &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx"); +$code.=<<___; + # + # Dst = tmp % m + # Call reduce(tmp, m, data, dst) + + # tail recursion optimization: jmp to mont_reduce and return from there + jmp mont_reduce + # call mont_reduce + # ret +.size mont_mul_a3b,.-mont_mul_a3b +___ +}}} + +{{{ +#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4 +# +# Input in memory [pA] and also in x7...x0 +# Uses all argument registers plus rax and rdx +# +# This version computes all of the off-diagonal terms into memory, +# and then it adds in the diagonal terms + +sub SQR_512 +{ + my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_; + my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); + my @X=@$x; # make a copy +$code.=<<___; + # ------------------ + # first pass 01...07 + # ------------------ + mov $X[0], $A + + mov $X[1],%rax + mul $A + mov %rax, (+$pDst_o+8*1)($pDst) +___ +for(my $i=2;$i<8;$i++) { +$code.=<<___; + mov %rdx, $X[$i-2] + mov $X[$i],%rax + mul $A + add %rax, $X[$i-2] + adc \$0, %rdx +___ +} +$code.=<<___; + mov %rdx, $x7 + + mov $X[0], (+$pDst_o+8*2)($pDst) + + # ------------------ + # second pass 12...17 + # ------------------ + + mov (+8*1)($pA), $A + + mov (+8*2)($pA),%rax + mul $A + add %rax, $X[1] + adc \$0, %rdx + mov $X[1], (+$pDst_o+8*3)($pDst) + + mov %rdx, $X[0] + mov (+8*3)($pA),%rax + mul $A + add %rax, $X[2] + adc \$0, %rdx + add $X[0], $X[2] + adc \$0, %rdx + mov $X[2], (+$pDst_o+8*4)($pDst) + + mov %rdx, $X[0] + mov (+8*4)($pA),%rax + mul $A + add %rax, $X[3] + adc \$0, %rdx + add $X[0], $X[3] + adc \$0, %rdx + + mov %rdx, $X[0] + mov (+8*5)($pA),%rax + mul $A + add %rax, $X[4] + adc \$0, %rdx + add $X[0], $X[4] + adc \$0, %rdx + + mov %rdx, $X[0] + mov $X[6],%rax + mul $A + add %rax, $X[5] + adc \$0, %rdx + add $X[0], $X[5] + adc \$0, %rdx + + mov %rdx, $X[0] + mov $X[7],%rax + mul $A + add %rax, $x7 + adc \$0, %rdx + add $X[0], $x7 + adc \$0, %rdx + + mov %rdx, $X[1] + + # ------------------ + # third pass 23...27 + # ------------------ + mov (+8*2)($pA), $A + + mov (+8*3)($pA),%rax + mul $A + add %rax, $X[3] + adc \$0, %rdx + mov $X[3], (+$pDst_o+8*5)($pDst) + + mov %rdx, $X[0] + mov (+8*4)($pA),%rax + mul $A + add %rax, $X[4] + adc \$0, %rdx + add $X[0], $X[4] + adc \$0, %rdx + mov $X[4], (+$pDst_o+8*6)($pDst) + + mov %rdx, $X[0] + mov (+8*5)($pA),%rax + mul $A + add %rax, $X[5] + adc \$0, %rdx + add $X[0], $X[5] + adc \$0, %rdx + + mov %rdx, $X[0] + mov $X[6],%rax + mul $A + add %rax, $x7 + adc \$0, %rdx + add $X[0], $x7 + adc \$0, %rdx + + mov %rdx, $X[0] + mov $X[7],%rax + mul $A + add %rax, $X[1] + adc \$0, %rdx + add $X[0], $X[1] + adc \$0, %rdx + + mov %rdx, $X[2] + + # ------------------ + # fourth pass 34...37 + # ------------------ + + mov (+8*3)($pA), $A + + mov (+8*4)($pA),%rax + mul $A + add %rax, $X[5] + adc \$0, %rdx + mov $X[5], (+$pDst_o+8*7)($pDst) + + mov %rdx, $X[0] + mov (+8*5)($pA),%rax + mul $A + add %rax, $x7 + adc \$0, %rdx + add $X[0], $x7 + adc \$0, %rdx + mov $x7, (+$pDst_o+8*8)($pDst) + + mov %rdx, $X[0] + mov $X[6],%rax + mul $A + add %rax, $X[1] + adc \$0, %rdx + add $X[0], $X[1] + adc \$0, %rdx + + mov %rdx, $X[0] + mov $X[7],%rax + mul $A + add %rax, $X[2] + adc \$0, %rdx + add $X[0], $X[2] + adc \$0, %rdx + + mov %rdx, $X[5] + + # ------------------ + # fifth pass 45...47 + # ------------------ + mov (+8*4)($pA), $A + + mov (+8*5)($pA),%rax + mul $A + add %rax, $X[1] + adc \$0, %rdx + mov $X[1], (+$pDst_o+8*9)($pDst) + + mov %rdx, $X[0] + mov $X[6],%rax + mul $A + add %rax, $X[2] + adc \$0, %rdx + add $X[0], $X[2] + adc \$0, %rdx + mov $X[2], (+$pDst_o+8*10)($pDst) + + mov %rdx, $X[0] + mov $X[7],%rax + mul $A + add %rax, $X[5] + adc \$0, %rdx + add $X[0], $X[5] + adc \$0, %rdx + + mov %rdx, $X[1] + + # ------------------ + # sixth pass 56...57 + # ------------------ + mov (+8*5)($pA), $A + + mov $X[6],%rax + mul $A + add %rax, $X[5] + adc \$0, %rdx + mov $X[5], (+$pDst_o+8*11)($pDst) + + mov %rdx, $X[0] + mov $X[7],%rax + mul $A + add %rax, $X[1] + adc \$0, %rdx + add $X[0], $X[1] + adc \$0, %rdx + mov $X[1], (+$pDst_o+8*12)($pDst) + + mov %rdx, $X[2] + + # ------------------ + # seventh pass 67 + # ------------------ + mov $X[6], $A + + mov $X[7],%rax + mul $A + add %rax, $X[2] + adc \$0, %rdx + mov $X[2], (+$pDst_o+8*13)($pDst) + + mov %rdx, (+$pDst_o+8*14)($pDst) + + # start finalize (add in squares, and double off-terms) + mov (+$pDst_o+8*1)($pDst), $X[0] + mov (+$pDst_o+8*2)($pDst), $X[1] + mov (+$pDst_o+8*3)($pDst), $X[2] + mov (+$pDst_o+8*4)($pDst), $X[3] + mov (+$pDst_o+8*5)($pDst), $X[4] + mov (+$pDst_o+8*6)($pDst), $X[5] + + mov (+8*3)($pA), %rax + mul %rax + mov %rax, $x6 + mov %rdx, $X[6] + + add $X[0], $X[0] + adc $X[1], $X[1] + adc $X[2], $X[2] + adc $X[3], $X[3] + adc $X[4], $X[4] + adc $X[5], $X[5] + adc \$0, $X[6] + + mov (+8*0)($pA), %rax + mul %rax + mov %rax, (+$pDst_o+8*0)($pDst) + mov %rdx, $A + + mov (+8*1)($pA), %rax + mul %rax + + add $A, $X[0] + adc %rax, $X[1] + adc \$0, %rdx + + mov %rdx, $A + mov $X[0], (+$pDst_o+8*1)($pDst) + mov $X[1], (+$pDst_o+8*2)($pDst) + + mov (+8*2)($pA), %rax + mul %rax + + add $A, $X[2] + adc %rax, $X[3] + adc \$0, %rdx + + mov %rdx, $A + + mov $X[2], (+$pDst_o+8*3)($pDst) + mov $X[3], (+$pDst_o+8*4)($pDst) + + xor $tmp, $tmp + add $A, $X[4] + adc $x6, $X[5] + adc \$0, $tmp + + mov $X[4], (+$pDst_o+8*5)($pDst) + mov $X[5], (+$pDst_o+8*6)($pDst) + + # %%tmp has 0/1 in column 7 + # %%A6 has a full value in column 7 + + mov (+$pDst_o+8*7)($pDst), $X[0] + mov (+$pDst_o+8*8)($pDst), $X[1] + mov (+$pDst_o+8*9)($pDst), $X[2] + mov (+$pDst_o+8*10)($pDst), $X[3] + mov (+$pDst_o+8*11)($pDst), $X[4] + mov (+$pDst_o+8*12)($pDst), $X[5] + mov (+$pDst_o+8*13)($pDst), $x6 + mov (+$pDst_o+8*14)($pDst), $x7 + + mov $X[7], %rax + mul %rax + mov %rax, $X[7] + mov %rdx, $A + + add $X[0], $X[0] + adc $X[1], $X[1] + adc $X[2], $X[2] + adc $X[3], $X[3] + adc $X[4], $X[4] + adc $X[5], $X[5] + adc $x6, $x6 + adc $x7, $x7 + adc \$0, $A + + add $tmp, $X[0] + + mov (+8*4)($pA), %rax + mul %rax + + add $X[6], $X[0] + adc %rax, $X[1] + adc \$0, %rdx + + mov %rdx, $tmp + + mov $X[0], (+$pDst_o+8*7)($pDst) + mov $X[1], (+$pDst_o+8*8)($pDst) + + mov (+8*5)($pA), %rax + mul %rax + + add $tmp, $X[2] + adc %rax, $X[3] + adc \$0, %rdx + + mov %rdx, $tmp + + mov $X[2], (+$pDst_o+8*9)($pDst) + mov $X[3], (+$pDst_o+8*10)($pDst) + + mov (+8*6)($pA), %rax + mul %rax + + add $tmp, $X[4] + adc %rax, $X[5] + adc \$0, %rdx + + mov $X[4], (+$pDst_o+8*11)($pDst) + mov $X[5], (+$pDst_o+8*12)($pDst) + + add %rdx, $x6 + adc $X[7], $x7 + adc \$0, $A + + mov $x6, (+$pDst_o+8*13)($pDst) + mov $x7, (+$pDst_o+8*14)($pDst) + mov $A, (+$pDst_o+8*15)($pDst) +___ +} + +# +# sqr_reduce: subroutine to compute Result = reduce(Result * Result) +# +# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10 +# +$code.=<<___; +.type sqr_reduce,\@abi-omnipotent +.align 16 +sqr_reduce: + mov (+$pResult_offset+8)(%rsp), %rcx +___ + &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi"); +$code.=<<___; + # tail recursion optimization: jmp to mont_reduce and return from there + jmp mont_reduce + # call mont_reduce + # ret +.size sqr_reduce,.-sqr_reduce +___ +}}} + +# +# MAIN FUNCTION +# + +#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */ +# UINT64 *g, /* 512 bits, 8 qwords */ +# UINT64 *exp, /* 512 bits, 8 qwords */ +# struct mod_ctx_512 *data) + +# window size = 5 +# table size = 2^5 = 32 +#table_entries equ 32 +#table_size equ table_entries * 8 +$code.=<<___; +.globl mod_exp_512 +.type mod_exp_512,\@function,4 +mod_exp_512: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + # adjust stack down and then align it with cache boundary + mov %rsp, %r8 + sub \$$mem_size, %rsp + and \$-64, %rsp + + # store previous stack pointer and arguments + mov %r8, (+$rsp_offset)(%rsp) + mov %rdi, (+$pResult_offset)(%rsp) + mov %rsi, (+$pG_offset)(%rsp) + mov %rcx, (+$pData_offset)(%rsp) +.Lbody: + # transform g into montgomery space + # GT = reduce(g * C2) = reduce(g * (2^256)) + # reduce expects to have the input in [tmp16] + pxor %xmm4, %xmm4 + movdqu (+16*0)(%rsi), %xmm0 + movdqu (+16*1)(%rsi), %xmm1 + movdqu (+16*2)(%rsi), %xmm2 + movdqu (+16*3)(%rsi), %xmm3 + movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) + movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp) + movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp) + movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp) + movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp) + + # load pExp before rdx gets blown away + movdqu (+16*0)(%rdx), %xmm0 + movdqu (+16*1)(%rdx), %xmm1 + movdqu (+16*2)(%rdx), %xmm2 + movdqu (+16*3)(%rdx), %xmm3 + + lea (+$GT_offset)(%rsp), %rbx + mov %rbx, (+$red_result_addr_offset)(%rsp) + call mont_reduce + + # Initialize tmp = C + lea (+$tmp_offset)(%rsp), %rcx + xor %rax, %rax + mov %rax, (+8*0)(%rcx) + mov %rax, (+8*1)(%rcx) + mov %rax, (+8*3)(%rcx) + mov %rax, (+8*4)(%rcx) + mov %rax, (+8*5)(%rcx) + mov %rax, (+8*6)(%rcx) + mov %rax, (+8*7)(%rcx) + mov %rax, (+$exp_offset+8*8)(%rsp) + movq \$1, (+8*2)(%rcx) + + lea (+$garray_offset)(%rsp), %rbp + mov %rcx, %rsi # pTmp + mov %rbp, %rdi # Garray[][0] +___ + + &swizzle("%rdi", "%rcx", "%rax", "%rbx"); + + # for (rax = 31; rax != 0; rax--) { + # tmp = reduce(tmp * G) + # swizzle(pg, tmp); + # pg += 2; } +$code.=<<___; + mov \$31, %rax + mov %rax, (+$i_offset)(%rsp) + mov %rbp, (+$pg_offset)(%rsp) + # rsi -> pTmp + mov %rsi, (+$red_result_addr_offset)(%rsp) + mov (+8*0)(%rsi), %r10 + mov (+8*1)(%rsi), %r11 + mov (+8*2)(%rsi), %r12 + mov (+8*3)(%rsi), %r13 + mov (+8*4)(%rsi), %r14 + mov (+8*5)(%rsi), %r15 + mov (+8*6)(%rsi), %r8 + mov (+8*7)(%rsi), %r9 +init_loop: + lea (+$GT_offset)(%rsp), %rdi + call mont_mul_a3b + lea (+$tmp_offset)(%rsp), %rsi + mov (+$pg_offset)(%rsp), %rbp + add \$2, %rbp + mov %rbp, (+$pg_offset)(%rsp) + mov %rsi, %rcx # rcx = rsi = addr of tmp +___ + + &swizzle("%rbp", "%rcx", "%rax", "%rbx"); +$code.=<<___; + mov (+$i_offset)(%rsp), %rax + sub \$1, %rax + mov %rax, (+$i_offset)(%rsp) + jne init_loop + + # + # Copy exponent onto stack + movdqa %xmm0, (+$exp_offset+16*0)(%rsp) + movdqa %xmm1, (+$exp_offset+16*1)(%rsp) + movdqa %xmm2, (+$exp_offset+16*2)(%rsp) + movdqa %xmm3, (+$exp_offset+16*3)(%rsp) + + + # + # Do exponentiation + # Initialize result to G[exp{511:507}] + mov (+$exp_offset+62)(%rsp), %eax + mov %rax, %rdx + shr \$11, %rax + and \$0x07FF, %edx + mov %edx, (+$exp_offset+62)(%rsp) + lea (+$garray_offset)(%rsp,%rax,2), %rsi + mov (+$pResult_offset)(%rsp), %rdx +___ + + &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); + + # + # Loop variables + # rcx = [loop_idx] = index: 510-5 to 0 by 5 +$code.=<<___; + movq \$505, (+$loop_idx_offset)(%rsp) + + mov (+$pResult_offset)(%rsp), %rcx + mov %rcx, (+$red_result_addr_offset)(%rsp) + mov (+8*0)(%rcx), %r10 + mov (+8*1)(%rcx), %r11 + mov (+8*2)(%rcx), %r12 + mov (+8*3)(%rcx), %r13 + mov (+8*4)(%rcx), %r14 + mov (+8*5)(%rcx), %r15 + mov (+8*6)(%rcx), %r8 + mov (+8*7)(%rcx), %r9 + jmp sqr_2 + +main_loop_a3b: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2: + call sqr_reduce + call sqr_reduce + + # + # Do multiply, first look up proper value in Garray + mov (+$loop_idx_offset)(%rsp), %rcx # bit index + mov %rcx, %rax + shr \$4, %rax # rax is word pointer + mov (+$exp_offset)(%rsp,%rax,2), %edx + and \$15, %rcx + shrq %cl, %rdx + and \$0x1F, %rdx + + lea (+$garray_offset)(%rsp,%rdx,2), %rsi + lea (+$tmp_offset)(%rsp), %rdx + mov %rdx, %rdi +___ + + &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); + # rdi = tmp = pG + + # + # Call mod_mul_a1(pDst, pSrc1, pSrc2, pM, pData) + # result result pG M Data +$code.=<<___; + mov (+$pResult_offset)(%rsp), %rsi + call mont_mul_a3b + + # + # finish loop + mov (+$loop_idx_offset)(%rsp), %rcx + sub \$5, %rcx + mov %rcx, (+$loop_idx_offset)(%rsp) + jge main_loop_a3b + + # + +end_main_loop_a3b: + # transform result out of Montgomery space + # result = reduce(result) + mov (+$pResult_offset)(%rsp), %rdx + pxor %xmm4, %xmm4 + movdqu (+16*0)(%rdx), %xmm0 + movdqu (+16*1)(%rdx), %xmm1 + movdqu (+16*2)(%rdx), %xmm2 + movdqu (+16*3)(%rdx), %xmm3 + movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) + movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) + movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp) + movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp) + movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp) + movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp) + call mont_reduce + + # If result > m, subract m + # load result into r15:r8 + mov (+$pResult_offset)(%rsp), %rax + mov (+8*0)(%rax), %r8 + mov (+8*1)(%rax), %r9 + mov (+8*2)(%rax), %r10 + mov (+8*3)(%rax), %r11 + mov (+8*4)(%rax), %r12 + mov (+8*5)(%rax), %r13 + mov (+8*6)(%rax), %r14 + mov (+8*7)(%rax), %r15 + + # subtract m + mov (+$pData_offset)(%rsp), %rbx + add \$$M, %rbx + + sub (+8*0)(%rbx), %r8 + sbb (+8*1)(%rbx), %r9 + sbb (+8*2)(%rbx), %r10 + sbb (+8*3)(%rbx), %r11 + sbb (+8*4)(%rbx), %r12 + sbb (+8*5)(%rbx), %r13 + sbb (+8*6)(%rbx), %r14 + sbb (+8*7)(%rbx), %r15 + + # if Carry is clear, replace result with difference + mov (+8*0)(%rax), %rsi + mov (+8*1)(%rax), %rdi + mov (+8*2)(%rax), %rcx + mov (+8*3)(%rax), %rdx + cmovnc %r8, %rsi + cmovnc %r9, %rdi + cmovnc %r10, %rcx + cmovnc %r11, %rdx + mov %rsi, (+8*0)(%rax) + mov %rdi, (+8*1)(%rax) + mov %rcx, (+8*2)(%rax) + mov %rdx, (+8*3)(%rax) + + mov (+8*4)(%rax), %rsi + mov (+8*5)(%rax), %rdi + mov (+8*6)(%rax), %rcx + mov (+8*7)(%rax), %rdx + cmovnc %r12, %rsi + cmovnc %r13, %rdi + cmovnc %r14, %rcx + cmovnc %r15, %rdx + mov %rsi, (+8*4)(%rax) + mov %rdi, (+8*5)(%rax) + mov %rcx, (+8*6)(%rax) + mov %rdx, (+8*7)(%rax) + + mov (+$rsp_offset)(%rsp), %rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbx + mov 40(%rsi),%rbp + lea 48(%rsi),%rsp +.Lepilogue: + ret +.size mod_exp_512, . - mod_exp_512 +___ + +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +my $rec="%rcx"; +my $frame="%rdx"; +my $context="%r8"; +my $disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type mod_exp_512_se_handler,\@abi-omnipotent +.align 16 +mod_exp_512_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lbody(%rip),%r10 + cmp %r10,%rbx # context->RipRsp + + lea .Lepilogue(%rip),%r10 + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + mov $rsp_offset(%rax),%rax # pull saved Rsp + + mov 32(%rax),%rbx + mov 40(%rax),%rbp + mov 24(%rax),%r12 + mov 16(%rax),%r13 + mov 8(%rax),%r14 + mov 0(%rax),%r15 + lea 48(%rax),%rax + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size mod_exp_512_se_handler,.-mod_exp_512_se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_mod_exp_512 + .rva .LSEH_end_mod_exp_512 + .rva .LSEH_info_mod_exp_512 + +.section .xdata +.align 8 +.LSEH_info_mod_exp_512: + .byte 9,0,0,0 + .rva mod_exp_512_se_handler +___ +} + +sub reg_part { +my ($reg,$conv)=@_; + if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } + elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } + elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } + elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } + return $reg; +} + +$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/(\(\+[^)]+\))/eval $1/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/parisc-mont.pl b/app/openssl/crypto/bn/asm/parisc-mont.pl new file mode 100644 index 00000000..c02ef6f0 --- /dev/null +++ b/app/openssl/crypto/bn/asm/parisc-mont.pl @@ -0,0 +1,995 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# On PA-7100LC this module performs ~90-50% better, less for longer +# keys, than code generated by gcc 3.2 for PA-RISC 1.1. Latter means +# that compiler utilized xmpyu instruction to perform 32x32=64-bit +# multiplication, which in turn means that "baseline" performance was +# optimal in respect to instruction set capabilities. Fair comparison +# with vendor compiler is problematic, because OpenSSL doesn't define +# BN_LLONG [presumably] for historical reasons, which drives compiler +# toward 4 times 16x16=32-bit multiplicatons [plus complementary +# shifts and additions] instead. This means that you should observe +# several times improvement over code generated by vendor compiler +# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual +# improvement coefficient was never collected on PA-7100LC, or any +# other 1.1 CPU, because I don't have access to such machine with +# vendor compiler. But to give you a taste, PA-RISC 1.1 code path +# reportedly outperformed code generated by cc +DA1.1 +O3 by factor +# of ~5x on PA-8600. +# +# On PA-RISC 2.0 it has to compete with pa-risc2[W].s, which is +# reportedly ~2x faster than vendor compiler generated code [according +# to comment in pa-risc2[W].s]. Here comes a catch. Execution core of +# this implementation is actually 32-bit one, in the sense that it +# operates on 32-bit values. But pa-risc2[W].s operates on arrays of +# 64-bit BN_LONGs... How do they interoperate then? No problem. This +# module picks halves of 64-bit values in reverse order and pretends +# they were 32-bit BN_LONGs. But can 32-bit core compete with "pure" +# 64-bit code such as pa-risc2[W].s then? Well, the thing is that +# 32x32=64-bit multiplication is the best even PA-RISC 2.0 can do, +# i.e. there is no "wider" multiplication like on most other 64-bit +# platforms. This means that even being effectively 32-bit, this +# implementation performs "64-bit" computational task in same amount +# of arithmetic operations, most notably multiplications. It requires +# more memory references, most notably to tp[num], but this doesn't +# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC +# 2.0 code path provides virtually same performance as pa-risc2[W].s: +# it's ~10% better for shortest key length and ~10% worse for longest +# one. +# +# In case it wasn't clear. The module has two distinct code paths: +# PA-RISC 1.1 and PA-RISC 2.0 ones. Latter features carry-free 64-bit +# additions and 64-bit integer loads, not to mention specific +# instruction scheduling. In 64-bit build naturally only 2.0 code path +# is assembled. In 32-bit application context both code paths are +# assembled, PA-RISC 2.0 CPU is detected at run-time and proper path +# is taken automatically. Also, in 32-bit build the module imposes +# couple of limitations: vector lengths has to be even and vector +# addresses has to be 64-bit aligned. Normally neither is a problem: +# most common key lengths are even and vectors are commonly malloc-ed, +# which ensures alignment. +# +# Special thanks to polarhome.com for providing HP-UX account on +# PA-RISC 1.1 machine, and to correspondent who chose to remain +# anonymous for testing the code on PA-RISC 2.0 machine. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +$flavour = shift; +$output = shift; + +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; + $BN_SZ =$SIZE_T; +} else { + $LEVEL ="1.1"; #$LEVEL.="\n\t.ALLOW\t2.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; + $BN_SZ =$SIZE_T; + if (open CONF,"<${dir}../../opensslconf.h") { + while() { + if (m/#\s*define\s+SIXTY_FOUR_BIT/) { + $BN_SZ=8; + $LEVEL="2.0"; + last; + } + } + close CONF; + } +} + +$FRAME=8*$SIZE_T+$FRAME_MARKER; # 8 saved regs + frame marker + # [+ argument transfer] +$LOCALS=$FRAME-$FRAME_MARKER; +$FRAME+=32; # local variables + +$tp="%r31"; +$ti1="%r29"; +$ti0="%r28"; + +$rp="%r26"; +$ap="%r25"; +$bp="%r24"; +$np="%r23"; +$n0="%r22"; # passed through stack in 32-bit +$num="%r21"; # passed through stack in 32-bit +$idx="%r20"; +$arrsz="%r19"; + +$nm1="%r7"; +$nm0="%r6"; +$ab1="%r5"; +$ab0="%r4"; + +$fp="%r3"; +$hi1="%r2"; +$hi0="%r1"; + +$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s + +$fm0="%fr4"; $fti=$fm0; +$fbi="%fr5L"; +$fn0="%fr5R"; +$fai="%fr6"; $fab0="%fr7"; $fab1="%fr8"; +$fni="%fr9"; $fnm0="%fr10"; $fnm1="%fr11"; + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT bn_mul_mont,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR + .ALIGN 64 +bn_mul_mont + .PROC + .CALLINFO FRAME=`$FRAME-8*$SIZE_T`,NO_CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=6 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + ldo -$FRAME(%sp),$fp +___ +$code.=<<___ if ($SIZE_T==4); + ldw `-$FRAME_MARKER-4`($fp),$n0 + ldw `-$FRAME_MARKER-8`($fp),$num + nop + nop ; alignment +___ +$code.=<<___ if ($BN_SZ==4); + comiclr,<= 6,$num,%r0 ; are vectors long enough? + b L\$abort + ldi 0,%r28 ; signal "unhandled" + add,ev %r0,$num,$num ; is $num even? + b L\$abort + nop + or $ap,$np,$ti1 + extru,= $ti1,31,3,%r0 ; are ap and np 64-bit aligned? + b L\$abort + nop + nop ; alignment + nop + + fldws 0($n0),${fn0} + fldws,ma 4($bp),${fbi} ; bp[0] +___ +$code.=<<___ if ($BN_SZ==8); + comib,> 3,$num,L\$abort ; are vectors long enough? + ldi 0,%r28 ; signal "unhandled" + addl $num,$num,$num ; I operate on 32-bit values + + fldws 4($n0),${fn0} ; only low part of n0 + fldws 4($bp),${fbi} ; bp[0] in flipped word order +___ +$code.=<<___; + fldds 0($ap),${fai} ; ap[0,1] + fldds 0($np),${fni} ; np[0,1] + + sh2addl $num,%r0,$arrsz + ldi 31,$hi0 + ldo 36($arrsz),$hi1 ; space for tp[num+1] + andcm $hi1,$hi0,$hi1 ; align + addl $hi1,%sp,%sp + $PUSH $fp,-$SIZE_T(%sp) + + ldo `$LOCALS+16`($fp),$xfer + ldo `$LOCALS+32+4`($fp),$tp + + xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[0] + xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[0] + xmpyu ${fn0},${fab0}R,${fm0} + + addl $arrsz,$ap,$ap ; point at the end + addl $arrsz,$np,$np + subi 0,$arrsz,$idx ; j=0 + ldo 8($idx),$idx ; j++++ + + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m + fstds ${fab0},-16($xfer) + fstds ${fnm0},-8($xfer) + fstds ${fab1},0($xfer) + fstds ${fnm1},8($xfer) + flddx $idx($ap),${fai} ; ap[2,3] + flddx $idx($np),${fni} ; np[2,3] +___ +$code.=<<___ if ($BN_SZ==4); + mtctl $hi0,%cr11 ; $hi0 still holds 31 + extrd,u,*= $hi0,%sar,1,$hi0 ; executes on PA-RISC 1.0 + b L\$parisc11 + nop +___ +$code.=<<___; # PA-RISC 2.0 code-path + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldd -16($xfer),$ab0 + fstds ${fab0},-16($xfer) + + extrd,u $ab0,31,32,$hi0 + extrd,u $ab0,63,32,$ab0 + ldd -8($xfer),$nm0 + fstds ${fnm0},-8($xfer) + ldo 8($idx),$idx ; j++++ + addl $ab0,$nm0,$nm0 ; low part is discarded + extrd,u $nm0,31,32,$hi1 + +L\$1st + xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m + ldd 0($xfer),$ab1 + fstds ${fab1},0($xfer) + addl $hi0,$ab1,$ab1 + extrd,u $ab1,31,32,$hi0 + ldd 8($xfer),$nm1 + fstds ${fnm1},8($xfer) + extrd,u $ab1,63,32,$ab1 + addl $hi1,$nm1,$nm1 + flddx $idx($ap),${fai} ; ap[j,j+1] + flddx $idx($np),${fni} ; np[j,j+1] + addl $ab1,$nm1,$nm1 + extrd,u $nm1,31,32,$hi1 + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldd -16($xfer),$ab0 + fstds ${fab0},-16($xfer) + addl $hi0,$ab0,$ab0 + extrd,u $ab0,31,32,$hi0 + ldd -8($xfer),$nm0 + fstds ${fnm0},-8($xfer) + extrd,u $ab0,63,32,$ab0 + addl $hi1,$nm0,$nm0 + stw $nm1,-4($tp) ; tp[j-1] + addl $ab0,$nm0,$nm0 + stw,ma $nm0,8($tp) ; tp[j-1] + addib,<> 8,$idx,L\$1st ; j++++ + extrd,u $nm0,31,32,$hi1 + + xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m + ldd 0($xfer),$ab1 + fstds ${fab1},0($xfer) + addl $hi0,$ab1,$ab1 + extrd,u $ab1,31,32,$hi0 + ldd 8($xfer),$nm1 + fstds ${fnm1},8($xfer) + extrd,u $ab1,63,32,$ab1 + addl $hi1,$nm1,$nm1 + ldd -16($xfer),$ab0 + addl $ab1,$nm1,$nm1 + ldd -8($xfer),$nm0 + extrd,u $nm1,31,32,$hi1 + + addl $hi0,$ab0,$ab0 + extrd,u $ab0,31,32,$hi0 + stw $nm1,-4($tp) ; tp[j-1] + extrd,u $ab0,63,32,$ab0 + addl $hi1,$nm0,$nm0 + ldd 0($xfer),$ab1 + addl $ab0,$nm0,$nm0 + ldd,mb 8($xfer),$nm1 + extrd,u $nm0,31,32,$hi1 + stw,ma $nm0,8($tp) ; tp[j-1] + + ldo -1($num),$num ; i-- + subi 0,$arrsz,$idx ; j=0 +___ +$code.=<<___ if ($BN_SZ==4); + fldws,ma 4($bp),${fbi} ; bp[1] +___ +$code.=<<___ if ($BN_SZ==8); + fldws 0($bp),${fbi} ; bp[1] in flipped word order +___ +$code.=<<___; + flddx $idx($ap),${fai} ; ap[0,1] + flddx $idx($np),${fni} ; np[0,1] + fldws 8($xfer),${fti}R ; tp[0] + addl $hi0,$ab1,$ab1 + extrd,u $ab1,31,32,$hi0 + extrd,u $ab1,63,32,$ab1 + ldo 8($idx),$idx ; j++++ + xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1] + xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1] + addl $hi1,$nm1,$nm1 + addl $ab1,$nm1,$nm1 + extrd,u $nm1,31,32,$hi1 + fstws,mb ${fab0}L,-8($xfer) ; save high part + stw $nm1,-4($tp) ; tp[j-1] + + fcpy,sgl %fr0,${fti}L ; zero high part + fcpy,sgl %fr0,${fab0}L + addl $hi1,$hi0,$hi0 + extrd,u $hi0,31,32,$hi1 + fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double + fcnvxf,dbl,dbl ${fab0},${fab0} + stw $hi0,0($tp) + stw $hi1,4($tp) + + fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] + fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int + xmpyu ${fn0},${fab0}R,${fm0} + ldo `$LOCALS+32+4`($fp),$tp +L\$outer + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m + fstds ${fab0},-16($xfer) ; 33-bit value + fstds ${fnm0},-8($xfer) + flddx $idx($ap),${fai} ; ap[2] + flddx $idx($np),${fni} ; np[2] + ldo 8($idx),$idx ; j++++ + ldd -16($xfer),$ab0 ; 33-bit value + ldd -8($xfer),$nm0 + ldw 0($xfer),$hi0 ; high part + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + extrd,u $ab0,31,32,$ti0 ; carry bit + extrd,u $ab0,63,32,$ab0 + fstds ${fab1},0($xfer) + addl $ti0,$hi0,$hi0 ; account carry bit + fstds ${fnm1},8($xfer) + addl $ab0,$nm0,$nm0 ; low part is discarded + ldw 0($tp),$ti1 ; tp[1] + extrd,u $nm0,31,32,$hi1 + fstds ${fab0},-16($xfer) + fstds ${fnm0},-8($xfer) + +L\$inner + xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m + ldd 0($xfer),$ab1 + fstds ${fab1},0($xfer) + addl $hi0,$ti1,$ti1 + addl $ti1,$ab1,$ab1 + ldd 8($xfer),$nm1 + fstds ${fnm1},8($xfer) + extrd,u $ab1,31,32,$hi0 + extrd,u $ab1,63,32,$ab1 + flddx $idx($ap),${fai} ; ap[j,j+1] + flddx $idx($np),${fni} ; np[j,j+1] + addl $hi1,$nm1,$nm1 + addl $ab1,$nm1,$nm1 + ldw 4($tp),$ti0 ; tp[j] + stw $nm1,-4($tp) ; tp[j-1] + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldd -16($xfer),$ab0 + fstds ${fab0},-16($xfer) + addl $hi0,$ti0,$ti0 + addl $ti0,$ab0,$ab0 + ldd -8($xfer),$nm0 + fstds ${fnm0},-8($xfer) + extrd,u $ab0,31,32,$hi0 + extrd,u $nm1,31,32,$hi1 + ldw 8($tp),$ti1 ; tp[j] + extrd,u $ab0,63,32,$ab0 + addl $hi1,$nm0,$nm0 + addl $ab0,$nm0,$nm0 + stw,ma $nm0,8($tp) ; tp[j-1] + addib,<> 8,$idx,L\$inner ; j++++ + extrd,u $nm0,31,32,$hi1 + + xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m + ldd 0($xfer),$ab1 + fstds ${fab1},0($xfer) + addl $hi0,$ti1,$ti1 + addl $ti1,$ab1,$ab1 + ldd 8($xfer),$nm1 + fstds ${fnm1},8($xfer) + extrd,u $ab1,31,32,$hi0 + extrd,u $ab1,63,32,$ab1 + ldw 4($tp),$ti0 ; tp[j] + addl $hi1,$nm1,$nm1 + addl $ab1,$nm1,$nm1 + ldd -16($xfer),$ab0 + ldd -8($xfer),$nm0 + extrd,u $nm1,31,32,$hi1 + + addl $hi0,$ab0,$ab0 + addl $ti0,$ab0,$ab0 + stw $nm1,-4($tp) ; tp[j-1] + extrd,u $ab0,31,32,$hi0 + ldw 8($tp),$ti1 ; tp[j] + extrd,u $ab0,63,32,$ab0 + addl $hi1,$nm0,$nm0 + ldd 0($xfer),$ab1 + addl $ab0,$nm0,$nm0 + ldd,mb 8($xfer),$nm1 + extrd,u $nm0,31,32,$hi1 + stw,ma $nm0,8($tp) ; tp[j-1] + + addib,= -1,$num,L\$outerdone ; i-- + subi 0,$arrsz,$idx ; j=0 +___ +$code.=<<___ if ($BN_SZ==4); + fldws,ma 4($bp),${fbi} ; bp[i] +___ +$code.=<<___ if ($BN_SZ==8); + ldi 12,$ti0 ; bp[i] in flipped word order + addl,ev %r0,$num,$num + ldi -4,$ti0 + addl $ti0,$bp,$bp + fldws 0($bp),${fbi} +___ +$code.=<<___; + flddx $idx($ap),${fai} ; ap[0] + addl $hi0,$ab1,$ab1 + flddx $idx($np),${fni} ; np[0] + fldws 8($xfer),${fti}R ; tp[0] + addl $ti1,$ab1,$ab1 + extrd,u $ab1,31,32,$hi0 + extrd,u $ab1,63,32,$ab1 + + ldo 8($idx),$idx ; j++++ + xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i] + xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i] + ldw 4($tp),$ti0 ; tp[j] + + addl $hi1,$nm1,$nm1 + fstws,mb ${fab0}L,-8($xfer) ; save high part + addl $ab1,$nm1,$nm1 + extrd,u $nm1,31,32,$hi1 + fcpy,sgl %fr0,${fti}L ; zero high part + fcpy,sgl %fr0,${fab0}L + stw $nm1,-4($tp) ; tp[j-1] + + fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double + fcnvxf,dbl,dbl ${fab0},${fab0} + addl $hi1,$hi0,$hi0 + fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] + addl $ti0,$hi0,$hi0 + extrd,u $hi0,31,32,$hi1 + fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int + stw $hi0,0($tp) + stw $hi1,4($tp) + xmpyu ${fn0},${fab0}R,${fm0} + + b L\$outer + ldo `$LOCALS+32+4`($fp),$tp + +L\$outerdone + addl $hi0,$ab1,$ab1 + addl $ti1,$ab1,$ab1 + extrd,u $ab1,31,32,$hi0 + extrd,u $ab1,63,32,$ab1 + + ldw 4($tp),$ti0 ; tp[j] + + addl $hi1,$nm1,$nm1 + addl $ab1,$nm1,$nm1 + extrd,u $nm1,31,32,$hi1 + stw $nm1,-4($tp) ; tp[j-1] + + addl $hi1,$hi0,$hi0 + addl $ti0,$hi0,$hi0 + extrd,u $hi0,31,32,$hi1 + stw $hi0,0($tp) + stw $hi1,4($tp) + + ldo `$LOCALS+32`($fp),$tp + sub %r0,%r0,%r0 ; clear borrow +___ +$code.=<<___ if ($BN_SZ==4); + ldws,ma 4($tp),$ti0 + extru,= $rp,31,3,%r0 ; is rp 64-bit aligned? + b L\$sub_pa11 + addl $tp,$arrsz,$tp +L\$sub + ldwx $idx($np),$hi0 + subb $ti0,$hi0,$hi1 + ldwx $idx($tp),$ti0 + addib,<> 4,$idx,L\$sub + stws,ma $hi1,4($rp) + + subb $ti0,%r0,$hi1 + ldo -4($tp),$tp +___ +$code.=<<___ if ($BN_SZ==8); + ldd,ma 8($tp),$ti0 +L\$sub + ldd $idx($np),$hi0 + shrpd $ti0,$ti0,32,$ti0 ; flip word order + std $ti0,-8($tp) ; save flipped value + sub,db $ti0,$hi0,$hi1 + ldd,ma 8($tp),$ti0 + addib,<> 8,$idx,L\$sub + std,ma $hi1,8($rp) + + extrd,u $ti0,31,32,$ti0 ; carry in flipped word order + sub,db $ti0,%r0,$hi1 + ldo -8($tp),$tp +___ +$code.=<<___; + and $tp,$hi1,$ap + andcm $rp,$hi1,$bp + or $ap,$bp,$np + + sub $rp,$arrsz,$rp ; rewind rp + subi 0,$arrsz,$idx + ldo `$LOCALS+32`($fp),$tp +L\$copy + ldd $idx($np),$hi0 + std,ma %r0,8($tp) + addib,<> 8,$idx,.-8 ; L\$copy + std,ma $hi0,8($rp) +___ + +if ($BN_SZ==4) { # PA-RISC 1.1 code-path +$ablo=$ab0; +$abhi=$ab1; +$nmlo0=$nm0; +$nmhi0=$nm1; +$nmlo1="%r9"; +$nmhi1="%r8"; + +$code.=<<___; + b L\$done + nop + + .ALIGN 8 +L\$parisc11 + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldw -12($xfer),$ablo + ldw -16($xfer),$hi0 + ldw -4($xfer),$nmlo0 + ldw -8($xfer),$nmhi0 + fstds ${fab0},-16($xfer) + fstds ${fnm0},-8($xfer) + + ldo 8($idx),$idx ; j++++ + add $ablo,$nmlo0,$nmlo0 ; discarded + addc %r0,$nmhi0,$hi1 + ldw 4($xfer),$ablo + ldw 0($xfer),$abhi + nop + +L\$1st_pa11 + xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] + flddx $idx($ap),${fai} ; ap[j,j+1] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m + flddx $idx($np),${fni} ; np[j,j+1] + add $hi0,$ablo,$ablo + ldw 12($xfer),$nmlo1 + addc %r0,$abhi,$hi0 + ldw 8($xfer),$nmhi1 + add $ablo,$nmlo1,$nmlo1 + fstds ${fab1},0($xfer) + addc %r0,$nmhi1,$nmhi1 + fstds ${fnm1},8($xfer) + add $hi1,$nmlo1,$nmlo1 + ldw -12($xfer),$ablo + addc %r0,$nmhi1,$hi1 + ldw -16($xfer),$abhi + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] + ldw -4($xfer),$nmlo0 + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldw -8($xfer),$nmhi0 + add $hi0,$ablo,$ablo + stw $nmlo1,-4($tp) ; tp[j-1] + addc %r0,$abhi,$hi0 + fstds ${fab0},-16($xfer) + add $ablo,$nmlo0,$nmlo0 + fstds ${fnm0},-8($xfer) + addc %r0,$nmhi0,$nmhi0 + ldw 0($xfer),$abhi + add $hi1,$nmlo0,$nmlo0 + ldw 4($xfer),$ablo + stws,ma $nmlo0,8($tp) ; tp[j-1] + addib,<> 8,$idx,L\$1st_pa11 ; j++++ + addc %r0,$nmhi0,$hi1 + + ldw 8($xfer),$nmhi1 + ldw 12($xfer),$nmlo1 + xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m + add $hi0,$ablo,$ablo + fstds ${fab1},0($xfer) + addc %r0,$abhi,$hi0 + fstds ${fnm1},8($xfer) + add $ablo,$nmlo1,$nmlo1 + ldw -16($xfer),$abhi + addc %r0,$nmhi1,$nmhi1 + ldw -12($xfer),$ablo + add $hi1,$nmlo1,$nmlo1 + ldw -8($xfer),$nmhi0 + addc %r0,$nmhi1,$hi1 + ldw -4($xfer),$nmlo0 + + add $hi0,$ablo,$ablo + stw $nmlo1,-4($tp) ; tp[j-1] + addc %r0,$abhi,$hi0 + ldw 0($xfer),$abhi + add $ablo,$nmlo0,$nmlo0 + ldw 4($xfer),$ablo + addc %r0,$nmhi0,$nmhi0 + ldws,mb 8($xfer),$nmhi1 + add $hi1,$nmlo0,$nmlo0 + ldw 4($xfer),$nmlo1 + addc %r0,$nmhi0,$hi1 + stws,ma $nmlo0,8($tp) ; tp[j-1] + + ldo -1($num),$num ; i-- + subi 0,$arrsz,$idx ; j=0 + + fldws,ma 4($bp),${fbi} ; bp[1] + flddx $idx($ap),${fai} ; ap[0,1] + flddx $idx($np),${fni} ; np[0,1] + fldws 8($xfer),${fti}R ; tp[0] + add $hi0,$ablo,$ablo + addc %r0,$abhi,$hi0 + ldo 8($idx),$idx ; j++++ + xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1] + xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1] + add $hi1,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$nmhi1 + add $ablo,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$hi1 + fstws,mb ${fab0}L,-8($xfer) ; save high part + stw $nmlo1,-4($tp) ; tp[j-1] + + fcpy,sgl %fr0,${fti}L ; zero high part + fcpy,sgl %fr0,${fab0}L + add $hi1,$hi0,$hi0 + addc %r0,%r0,$hi1 + fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double + fcnvxf,dbl,dbl ${fab0},${fab0} + stw $hi0,0($tp) + stw $hi1,4($tp) + + fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] + fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int + xmpyu ${fn0},${fab0}R,${fm0} + ldo `$LOCALS+32+4`($fp),$tp +L\$outer_pa11 + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m + fstds ${fab0},-16($xfer) ; 33-bit value + fstds ${fnm0},-8($xfer) + flddx $idx($ap),${fai} ; ap[2,3] + flddx $idx($np),${fni} ; np[2,3] + ldw -16($xfer),$abhi ; carry bit actually + ldo 8($idx),$idx ; j++++ + ldw -12($xfer),$ablo + ldw -8($xfer),$nmhi0 + ldw -4($xfer),$nmlo0 + ldw 0($xfer),$hi0 ; high part + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + fstds ${fab1},0($xfer) + addl $abhi,$hi0,$hi0 ; account carry bit + fstds ${fnm1},8($xfer) + add $ablo,$nmlo0,$nmlo0 ; discarded + ldw 0($tp),$ti1 ; tp[1] + addc %r0,$nmhi0,$hi1 + fstds ${fab0},-16($xfer) + fstds ${fnm0},-8($xfer) + ldw 4($xfer),$ablo + ldw 0($xfer),$abhi + +L\$inner_pa11 + xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] + flddx $idx($ap),${fai} ; ap[j,j+1] + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m + flddx $idx($np),${fni} ; np[j,j+1] + add $hi0,$ablo,$ablo + ldw 4($tp),$ti0 ; tp[j] + addc %r0,$abhi,$abhi + ldw 12($xfer),$nmlo1 + add $ti1,$ablo,$ablo + ldw 8($xfer),$nmhi1 + addc %r0,$abhi,$hi0 + fstds ${fab1},0($xfer) + add $ablo,$nmlo1,$nmlo1 + fstds ${fnm1},8($xfer) + addc %r0,$nmhi1,$nmhi1 + ldw -12($xfer),$ablo + add $hi1,$nmlo1,$nmlo1 + ldw -16($xfer),$abhi + addc %r0,$nmhi1,$hi1 + + xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] + ldw 8($tp),$ti1 ; tp[j] + xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m + ldw -4($xfer),$nmlo0 + add $hi0,$ablo,$ablo + ldw -8($xfer),$nmhi0 + addc %r0,$abhi,$abhi + stw $nmlo1,-4($tp) ; tp[j-1] + add $ti0,$ablo,$ablo + fstds ${fab0},-16($xfer) + addc %r0,$abhi,$hi0 + fstds ${fnm0},-8($xfer) + add $ablo,$nmlo0,$nmlo0 + ldw 4($xfer),$ablo + addc %r0,$nmhi0,$nmhi0 + ldw 0($xfer),$abhi + add $hi1,$nmlo0,$nmlo0 + stws,ma $nmlo0,8($tp) ; tp[j-1] + addib,<> 8,$idx,L\$inner_pa11 ; j++++ + addc %r0,$nmhi0,$hi1 + + xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i] + ldw 12($xfer),$nmlo1 + xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m + ldw 8($xfer),$nmhi1 + add $hi0,$ablo,$ablo + ldw 4($tp),$ti0 ; tp[j] + addc %r0,$abhi,$abhi + fstds ${fab1},0($xfer) + add $ti1,$ablo,$ablo + fstds ${fnm1},8($xfer) + addc %r0,$abhi,$hi0 + ldw -16($xfer),$abhi + add $ablo,$nmlo1,$nmlo1 + ldw -12($xfer),$ablo + addc %r0,$nmhi1,$nmhi1 + ldw -8($xfer),$nmhi0 + add $hi1,$nmlo1,$nmlo1 + ldw -4($xfer),$nmlo0 + addc %r0,$nmhi1,$hi1 + + add $hi0,$ablo,$ablo + stw $nmlo1,-4($tp) ; tp[j-1] + addc %r0,$abhi,$abhi + add $ti0,$ablo,$ablo + ldw 8($tp),$ti1 ; tp[j] + addc %r0,$abhi,$hi0 + ldw 0($xfer),$abhi + add $ablo,$nmlo0,$nmlo0 + ldw 4($xfer),$ablo + addc %r0,$nmhi0,$nmhi0 + ldws,mb 8($xfer),$nmhi1 + add $hi1,$nmlo0,$nmlo0 + ldw 4($xfer),$nmlo1 + addc %r0,$nmhi0,$hi1 + stws,ma $nmlo0,8($tp) ; tp[j-1] + + addib,= -1,$num,L\$outerdone_pa11; i-- + subi 0,$arrsz,$idx ; j=0 + + fldws,ma 4($bp),${fbi} ; bp[i] + flddx $idx($ap),${fai} ; ap[0] + add $hi0,$ablo,$ablo + addc %r0,$abhi,$abhi + flddx $idx($np),${fni} ; np[0] + fldws 8($xfer),${fti}R ; tp[0] + add $ti1,$ablo,$ablo + addc %r0,$abhi,$hi0 + + ldo 8($idx),$idx ; j++++ + xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i] + xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i] + ldw 4($tp),$ti0 ; tp[j] + + add $hi1,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$nmhi1 + fstws,mb ${fab0}L,-8($xfer) ; save high part + add $ablo,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$hi1 + fcpy,sgl %fr0,${fti}L ; zero high part + fcpy,sgl %fr0,${fab0}L + stw $nmlo1,-4($tp) ; tp[j-1] + + fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double + fcnvxf,dbl,dbl ${fab0},${fab0} + add $hi1,$hi0,$hi0 + addc %r0,%r0,$hi1 + fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] + add $ti0,$hi0,$hi0 + addc %r0,$hi1,$hi1 + fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int + stw $hi0,0($tp) + stw $hi1,4($tp) + xmpyu ${fn0},${fab0}R,${fm0} + + b L\$outer_pa11 + ldo `$LOCALS+32+4`($fp),$tp + +L\$outerdone_pa11 + add $hi0,$ablo,$ablo + addc %r0,$abhi,$abhi + add $ti1,$ablo,$ablo + addc %r0,$abhi,$hi0 + + ldw 4($tp),$ti0 ; tp[j] + + add $hi1,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$nmhi1 + add $ablo,$nmlo1,$nmlo1 + addc %r0,$nmhi1,$hi1 + stw $nmlo1,-4($tp) ; tp[j-1] + + add $hi1,$hi0,$hi0 + addc %r0,%r0,$hi1 + add $ti0,$hi0,$hi0 + addc %r0,$hi1,$hi1 + stw $hi0,0($tp) + stw $hi1,4($tp) + + ldo `$LOCALS+32+4`($fp),$tp + sub %r0,%r0,%r0 ; clear borrow + ldw -4($tp),$ti0 + addl $tp,$arrsz,$tp +L\$sub_pa11 + ldwx $idx($np),$hi0 + subb $ti0,$hi0,$hi1 + ldwx $idx($tp),$ti0 + addib,<> 4,$idx,L\$sub_pa11 + stws,ma $hi1,4($rp) + + subb $ti0,%r0,$hi1 + ldo -4($tp),$tp + and $tp,$hi1,$ap + andcm $rp,$hi1,$bp + or $ap,$bp,$np + + sub $rp,$arrsz,$rp ; rewind rp + subi 0,$arrsz,$idx + ldo `$LOCALS+32`($fp),$tp +L\$copy_pa11 + ldwx $idx($np),$hi0 + stws,ma %r0,4($tp) + addib,<> 4,$idx,L\$copy_pa11 + stws,ma $hi0,4($rp) + + nop ; alignment +L\$done +___ +} + +$code.=<<___; + ldi 1,%r28 ; signal "handled" + ldo $FRAME($fp),%sp ; destroy tp[num+1] + + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 +L\$abort + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + .STRINGZ "Montgomery Multiplication for PA-RISC, CRYPTOGAMS by " +___ + +# Explicitly encode PA-RISC 2.0 instructions used in this module, so +# that it can be compiled with .LEVEL 1.0. It should be noted that I +# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 +# directive... + +my $ldd = sub { + my ($mod,$args) = @_; + my $orig = "ldd$mod\t$args"; + + if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 + { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 + { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; + $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset + $opcode|=(1<<5) if ($mod =~ /^,m/); + $opcode|=(1<<13) if ($mod =~ /^,mb/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $std = sub { + my ($mod,$args) = @_; + my $orig = "std$mod\t$args"; + + if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 6 + { my $opcode=(0x03<<26)|($3<<21)|($1<<16)|(1<<12)|(0xB<<6); + $opcode|=(($2&0xF)<<1)|(($2&0x10)>>4); # encode offset + $opcode|=(1<<5) if ($mod =~ /^,m/); + $opcode|=(1<<13) if ($mod =~ /^,mb/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $extrd = sub { + my ($mod,$args) = @_; + my $orig = "extrd$mod\t$args"; + + # I only have ",u" completer, it's implicitly encoded... + if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 + { my $opcode=(0x36<<26)|($1<<21)|($4<<16); + my $len=32-$3; + $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos + $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 + { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); + my $len=32-$2; + $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len + $opcode |= (1<<13) if ($mod =~ /,\**=/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $shrpd = sub { + my ($mod,$args) = @_; + my $orig = "shrpd$mod\t$args"; + + if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 + { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; + my $cpos=63-$3; + $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $sub = sub { + my ($mod,$args) = @_; + my $orig = "sub$mod\t$args"; + + if ($mod eq ",db" && $args =~ /%r([0-9]+),%r([0-9]+),%r([0-9]+)/) { + my $opcode=(0x02<<26)|($2<<21)|($1<<16)|$3; + $opcode|=(1<<10); # e1 + $opcode|=(1<<8); # e2 + $opcode|=(1<<5); # d + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig + } + else { "\t".$orig; } +}; + +sub assemble { + my ($mnemonic,$mod,$args)=@_; + my $opcode = eval("\$$mnemonic"); + + ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + # flip word order in 64-bit mode... + s/(xmpyu\s+)($fai|$fni)([LR])/$1.$2.($3 eq "L"?"R":"L")/e if ($BN_SZ==8); + # assemble 2.0 instructions in 32-bit mode... + s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); + + s/\bbv\b/bve/gm if ($SIZE_T==8); + + print $_,"\n"; +} +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/ppc-mont.pl b/app/openssl/crypto/bn/asm/ppc-mont.pl index 7849eae9..f9b6992c 100644 --- a/app/openssl/crypto/bn/asm/ppc-mont.pl +++ b/app/openssl/crypto/bn/asm/ppc-mont.pl @@ -31,7 +31,6 @@ if ($flavour =~ /32/) { $BNSZ= $BITS/8; $SIZE_T=4; $RZONE= 224; - $FRAME= $SIZE_T*16; $LD= "lwz"; # load $LDU= "lwzu"; # load and update @@ -51,7 +50,6 @@ if ($flavour =~ /32/) { $BNSZ= $BITS/8; $SIZE_T=8; $RZONE= 288; - $FRAME= $SIZE_T*16; # same as above, but 64-bit mnemonics... $LD= "ld"; # load @@ -69,6 +67,9 @@ if ($flavour =~ /32/) { $POP= $LD; } else { die "nonsense $flavour"; } +$FRAME=8*$SIZE_T+$RZONE; +$LOCALS=8*$SIZE_T; + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -89,18 +90,18 @@ $aj="r10"; $nj="r11"; $tj="r12"; # non-volatile registers -$i="r14"; -$j="r15"; -$tp="r16"; -$m0="r17"; -$m1="r18"; -$lo0="r19"; -$hi0="r20"; -$lo1="r21"; -$hi1="r22"; -$alo="r23"; -$ahi="r24"; -$nlo="r25"; +$i="r20"; +$j="r21"; +$tp="r22"; +$m0="r23"; +$m1="r24"; +$lo0="r25"; +$hi0="r26"; +$lo1="r27"; +$hi1="r28"; +$alo="r29"; +$ahi="r30"; +$nlo="r31"; # $nhi="r0"; @@ -108,42 +109,48 @@ $code=<<___; .machine "any" .text -.globl .bn_mul_mont +.globl .bn_mul_mont_int .align 4 -.bn_mul_mont: +.bn_mul_mont_int: cmpwi $num,4 mr $rp,r3 ; $rp is reassigned li r3,0 bltlr - +___ +$code.=<<___ if ($BNSZ==4); + cmpwi $num,32 ; longer key performance is not better + bgelr +___ +$code.=<<___; slwi $num,$num,`log($BNSZ)/log(2)` li $tj,-4096 - addi $ovf,$num,`$FRAME+$RZONE` + addi $ovf,$num,$FRAME subf $ovf,$ovf,$sp ; $sp-$ovf and $ovf,$ovf,$tj ; minimize TLB usage subf $ovf,$sp,$ovf ; $ovf-$sp + mr $tj,$sp srwi $num,$num,`log($BNSZ)/log(2)` $STUX $sp,$sp,$ovf - $PUSH r14,`4*$SIZE_T`($sp) - $PUSH r15,`5*$SIZE_T`($sp) - $PUSH r16,`6*$SIZE_T`($sp) - $PUSH r17,`7*$SIZE_T`($sp) - $PUSH r18,`8*$SIZE_T`($sp) - $PUSH r19,`9*$SIZE_T`($sp) - $PUSH r20,`10*$SIZE_T`($sp) - $PUSH r21,`11*$SIZE_T`($sp) - $PUSH r22,`12*$SIZE_T`($sp) - $PUSH r23,`13*$SIZE_T`($sp) - $PUSH r24,`14*$SIZE_T`($sp) - $PUSH r25,`15*$SIZE_T`($sp) + $PUSH r20,`-12*$SIZE_T`($tj) + $PUSH r21,`-11*$SIZE_T`($tj) + $PUSH r22,`-10*$SIZE_T`($tj) + $PUSH r23,`-9*$SIZE_T`($tj) + $PUSH r24,`-8*$SIZE_T`($tj) + $PUSH r25,`-7*$SIZE_T`($tj) + $PUSH r26,`-6*$SIZE_T`($tj) + $PUSH r27,`-5*$SIZE_T`($tj) + $PUSH r28,`-4*$SIZE_T`($tj) + $PUSH r29,`-3*$SIZE_T`($tj) + $PUSH r30,`-2*$SIZE_T`($tj) + $PUSH r31,`-1*$SIZE_T`($tj) $LD $n0,0($n0) ; pull n0[0] value addi $num,$num,-2 ; adjust $num for counter register $LD $m0,0($bp) ; m0=bp[0] $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$FRAME + addi $tp,$sp,$LOCALS $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0] $UMULH $hi0,$aj,$m0 @@ -205,8 +212,8 @@ L1st: Louter: $LDX $m0,$bp,$i ; m0=bp[i] $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$FRAME - $LD $tj,$FRAME($sp) ; tp[0] + addi $tp,$sp,$LOCALS + $LD $tj,$LOCALS($sp); tp[0] $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i] $UMULH $hi0,$aj,$m0 $LD $aj,$BNSZ($ap) ; ap[1] @@ -273,7 +280,7 @@ Linner: addi $num,$num,2 ; restore $num subfc $j,$j,$j ; j=0 and "clear" XER[CA] - addi $tp,$sp,$FRAME + addi $tp,$sp,$LOCALS mtctr $num .align 4 @@ -299,23 +306,27 @@ Lcopy: ; copy or in-place refresh addi $j,$j,$BNSZ bdnz- Lcopy - $POP r14,`4*$SIZE_T`($sp) - $POP r15,`5*$SIZE_T`($sp) - $POP r16,`6*$SIZE_T`($sp) - $POP r17,`7*$SIZE_T`($sp) - $POP r18,`8*$SIZE_T`($sp) - $POP r19,`9*$SIZE_T`($sp) - $POP r20,`10*$SIZE_T`($sp) - $POP r21,`11*$SIZE_T`($sp) - $POP r22,`12*$SIZE_T`($sp) - $POP r23,`13*$SIZE_T`($sp) - $POP r24,`14*$SIZE_T`($sp) - $POP r25,`15*$SIZE_T`($sp) - $POP $sp,0($sp) + $POP $tj,0($sp) li r3,1 + $POP r20,`-12*$SIZE_T`($tj) + $POP r21,`-11*$SIZE_T`($tj) + $POP r22,`-10*$SIZE_T`($tj) + $POP r23,`-9*$SIZE_T`($tj) + $POP r24,`-8*$SIZE_T`($tj) + $POP r25,`-7*$SIZE_T`($tj) + $POP r26,`-6*$SIZE_T`($tj) + $POP r27,`-5*$SIZE_T`($tj) + $POP r28,`-4*$SIZE_T`($tj) + $POP r29,`-3*$SIZE_T`($tj) + $POP r30,`-2*$SIZE_T`($tj) + $POP r31,`-1*$SIZE_T`($tj) + mr $sp,$tj blr .long 0 -.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " + .byte 0,12,4,0,0x80,12,6,0 + .long 0 + +.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/bn/asm/ppc.pl b/app/openssl/crypto/bn/asm/ppc.pl index 37c65d35..1249ce22 100644 --- a/app/openssl/crypto/bn/asm/ppc.pl +++ b/app/openssl/crypto/bn/asm/ppc.pl @@ -389,7 +389,9 @@ $data=<+-------------------------------+ # | saved sp | # +-------------------------------+ -# | | -# +-------------------------------+ -# | 10 saved gpr, r14-r23 | -# . . -# . . -# +12*size_t +-------------------------------+ -# | 12 saved fpr, f14-f25 | # . . -# . . -# +12*8 +-------------------------------+ -# | padding to 64 byte boundary | -# . . -# +X +-------------------------------+ +# +64 +-------------------------------+ # | 16 gpr<->fpr transfer zone | # . . # . . @@ -173,6 +179,16 @@ $T3a="f24"; $T3b="f25"; # . . # . . # +-------------------------------+ +# . . +# -12*size_t +-------------------------------+ +# | 10 saved gpr, r22-r31 | +# . . +# . . +# -12*8 +-------------------------------+ +# | 12 saved fpr, f20-f31 | +# . . +# . . +# +-------------------------------+ $code=<<___; .machine "any" @@ -181,14 +197,14 @@ $code=<<___; .globl .$fname .align 5 .$fname: - cmpwi $num,4 + cmpwi $num,`3*8/$SIZE_T` mr $rp,r3 ; $rp is reassigned li r3,0 ; possible "not handled" return code bltlr- - andi. r0,$num,1 ; $num has to be even + andi. r0,$num,`16/$SIZE_T-1` ; $num has to be "even" bnelr- - slwi $num,$num,3 ; num*=8 + slwi $num,$num,`log($SIZE_T)/log(2)` ; num*=sizeof(BN_LONG) li $i,-4096 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num add $tp,$tp,$num ; place for tp[num+1] @@ -196,35 +212,50 @@ $code=<<___; subf $tp,$tp,$sp ; $sp-$tp and $tp,$tp,$i ; minimize TLB usage subf $tp,$sp,$tp ; $tp-$sp + mr $i,$sp $STUX $sp,$sp,$tp ; alloca - $PUSH r14,`2*$SIZE_T`($sp) - $PUSH r15,`3*$SIZE_T`($sp) - $PUSH r16,`4*$SIZE_T`($sp) - $PUSH r17,`5*$SIZE_T`($sp) - $PUSH r18,`6*$SIZE_T`($sp) - $PUSH r19,`7*$SIZE_T`($sp) - $PUSH r20,`8*$SIZE_T`($sp) - $PUSH r21,`9*$SIZE_T`($sp) - $PUSH r22,`10*$SIZE_T`($sp) - $PUSH r23,`11*$SIZE_T`($sp) - stfd f14,`12*$SIZE_T+0`($sp) - stfd f15,`12*$SIZE_T+8`($sp) - stfd f16,`12*$SIZE_T+16`($sp) - stfd f17,`12*$SIZE_T+24`($sp) - stfd f18,`12*$SIZE_T+32`($sp) - stfd f19,`12*$SIZE_T+40`($sp) - stfd f20,`12*$SIZE_T+48`($sp) - stfd f21,`12*$SIZE_T+56`($sp) - stfd f22,`12*$SIZE_T+64`($sp) - stfd f23,`12*$SIZE_T+72`($sp) - stfd f24,`12*$SIZE_T+80`($sp) - stfd f25,`12*$SIZE_T+88`($sp) - + $PUSH r22,`-12*8-10*$SIZE_T`($i) + $PUSH r23,`-12*8-9*$SIZE_T`($i) + $PUSH r24,`-12*8-8*$SIZE_T`($i) + $PUSH r25,`-12*8-7*$SIZE_T`($i) + $PUSH r26,`-12*8-6*$SIZE_T`($i) + $PUSH r27,`-12*8-5*$SIZE_T`($i) + $PUSH r28,`-12*8-4*$SIZE_T`($i) + $PUSH r29,`-12*8-3*$SIZE_T`($i) + $PUSH r30,`-12*8-2*$SIZE_T`($i) + $PUSH r31,`-12*8-1*$SIZE_T`($i) + stfd f20,`-12*8`($i) + stfd f21,`-11*8`($i) + stfd f22,`-10*8`($i) + stfd f23,`-9*8`($i) + stfd f24,`-8*8`($i) + stfd f25,`-7*8`($i) + stfd f26,`-6*8`($i) + stfd f27,`-5*8`($i) + stfd f28,`-4*8`($i) + stfd f29,`-3*8`($i) + stfd f30,`-2*8`($i) + stfd f31,`-1*8`($i) +___ +$code.=<<___ if ($SIZE_T==8); ld $a0,0($ap) ; pull ap[0] value ld $n0,0($n0) ; pull n0[0] value ld $t3,0($bp) ; bp[0] - +___ +$code.=<<___ if ($SIZE_T==4); + mr $t1,$n0 + lwz $a0,0($ap) ; pull ap[0,1] value + lwz $t0,4($ap) + lwz $n0,0($t1) ; pull n0[0,1] value + lwz $t1,4($t1) + lwz $t3,0($bp) ; bp[0,1] + lwz $t2,4($bp) + insrdi $a0,$t0,32,0 + insrdi $n0,$t1,32,0 + insrdi $t3,$t2,32,0 +___ +$code.=<<___; addi $tp,$sp,`$FRAME+$TRANSFER+8+64` li $i,-64 add $nap_d,$tp,$num @@ -258,6 +289,8 @@ $code=<<___; std $t5,`$FRAME+40`($sp) std $t6,`$FRAME+48`($sp) std $t7,`$FRAME+56`($sp) +___ +$code.=<<___ if ($SIZE_T==8); lwz $t0,4($ap) ; load a[j] as 32-bit word pair lwz $t1,0($ap) lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair @@ -266,6 +299,18 @@ $code=<<___; lwz $t5,0($np) lwz $t6,12($np) ; load n[j+1] as 32-bit word pair lwz $t7,8($np) +___ +$code.=<<___ if ($SIZE_T==4); + lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs + lwz $t1,4($ap) + lwz $t2,8($ap) + lwz $t3,12($ap) + lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs + lwz $t5,4($np) + lwz $t6,8($np) + lwz $t7,12($np) +___ +$code.=<<___; lfd $ba,`$FRAME+0`($sp) lfd $bb,`$FRAME+8`($sp) lfd $bc,`$FRAME+16`($sp) @@ -374,6 +419,8 @@ $code=<<___; .align 5 L1st: +___ +$code.=<<___ if ($SIZE_T==8); lwz $t0,4($ap) ; load a[j] as 32-bit word pair lwz $t1,0($ap) lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair @@ -382,6 +429,18 @@ L1st: lwz $t5,0($np) lwz $t6,12($np) ; load n[j+1] as 32-bit word pair lwz $t7,8($np) +___ +$code.=<<___ if ($SIZE_T==4); + lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs + lwz $t1,4($ap) + lwz $t2,8($ap) + lwz $t3,12($ap) + lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs + lwz $t5,4($np) + lwz $t6,8($np) + lwz $t7,12($np) +___ +$code.=<<___; std $t0,`$FRAME+64`($sp) std $t1,`$FRAME+72`($sp) std $t2,`$FRAME+80`($sp) @@ -559,7 +618,17 @@ L1st: li $i,8 ; i=1 .align 5 Louter: +___ +$code.=<<___ if ($SIZE_T==8); ldx $t3,$bp,$i ; bp[i] +___ +$code.=<<___ if ($SIZE_T==4); + add $t0,$bp,$i + lwz $t3,0($t0) ; bp[i,i+1] + lwz $t0,4($t0) + insrdi $t3,$t0,32,0 +___ +$code.=<<___; ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] mulld $t7,$a0,$t3 ; ap[0]*bp[i] @@ -761,6 +830,13 @@ Linner: stfd $T0b,`$FRAME+8`($sp) add $t7,$t7,$carry addc $t3,$t0,$t1 +___ +$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] + extrdi $t0,$t0,32,0 + extrdi $t1,$t1,32,0 + adde $t0,$t0,$t1 +___ +$code.=<<___; stfd $T1a,`$FRAME+16`($sp) stfd $T1b,`$FRAME+24`($sp) insrdi $t4,$t7,16,0 ; 64..127 bits @@ -768,6 +844,13 @@ Linner: stfd $T2a,`$FRAME+32`($sp) stfd $T2b,`$FRAME+40`($sp) adde $t5,$t4,$t2 +___ +$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] + extrdi $t4,$t4,32,0 + extrdi $t2,$t2,32,0 + adde $t4,$t4,$t2 +___ +$code.=<<___; stfd $T3a,`$FRAME+48`($sp) stfd $T3b,`$FRAME+56`($sp) addze $carry,$carry @@ -816,7 +899,21 @@ Linner: ld $t7,`$FRAME+72`($sp) addc $t3,$t0,$t1 +___ +$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] + extrdi $t0,$t0,32,0 + extrdi $t1,$t1,32,0 + adde $t0,$t0,$t1 +___ +$code.=<<___; adde $t5,$t4,$t2 +___ +$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] + extrdi $t4,$t4,32,0 + extrdi $t2,$t2,32,0 + adde $t4,$t4,$t2 +___ +$code.=<<___; addze $carry,$carry std $t3,-16($tp) ; tp[j-1] @@ -835,7 +932,9 @@ Linner: subf $nap_d,$t7,$nap_d ; rewind pointer cmpw $i,$num blt- Louter +___ +$code.=<<___ if ($SIZE_T==8); subf $np,$num,$np ; rewind np addi $j,$j,1 ; restore counter subfc $i,$i,$i ; j=0 and "clear" XER[CA] @@ -883,34 +982,105 @@ Lcopy: ; copy or in-place refresh stdx $i,$t4,$i addi $i,$i,16 bdnz- Lcopy +___ +$code.=<<___ if ($SIZE_T==4); + subf $np,$num,$np ; rewind np + addi $j,$j,1 ; restore counter + subfc $i,$i,$i ; j=0 and "clear" XER[CA] + addi $tp,$sp,`$FRAME+$TRANSFER` + addi $np,$np,-4 + addi $rp,$rp,-4 + addi $ap,$sp,`$FRAME+$TRANSFER+4` + mtctr $j + +.align 4 +Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order + ldu $t2,16($tp) + lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order + lwz $t5,8($np) + lwz $t6,12($np) + lwzu $t7,16($np) + extrdi $t1,$t0,32,0 + extrdi $t3,$t2,32,0 + subfe $t4,$t4,$t0 ; tp[j]-np[j] + stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order + subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] + stw $t1,8($ap) + subfe $t6,$t6,$t2 ; tp[j+2]-np[j+2] + stw $t2,12($ap) + subfe $t7,$t7,$t3 ; tp[j+3]-np[j+3] + stwu $t3,16($ap) + stw $t4,4($rp) + stw $t5,8($rp) + stw $t6,12($rp) + stwu $t7,16($rp) + bdnz- Lsub + + li $i,0 + subfe $ovf,$i,$ovf ; handle upmost overflow bit + addi $tp,$sp,`$FRAME+$TRANSFER+4` + subf $rp,$num,$rp ; rewind rp + and $ap,$tp,$ovf + andc $np,$rp,$ovf + or $ap,$ap,$np ; ap=borrow?tp:rp + addi $tp,$sp,`$FRAME+$TRANSFER` + mtctr $j + +.align 4 +Lcopy: ; copy or in-place refresh + lwz $t0,4($ap) + lwz $t1,8($ap) + lwz $t2,12($ap) + lwzu $t3,16($ap) + std $i,8($nap_d) ; zap nap_d + std $i,16($nap_d) + std $i,24($nap_d) + std $i,32($nap_d) + std $i,40($nap_d) + std $i,48($nap_d) + std $i,56($nap_d) + stdu $i,64($nap_d) + stw $t0,4($rp) + stw $t1,8($rp) + stw $t2,12($rp) + stwu $t3,16($rp) + std $i,8($tp) ; zap tp at once + stdu $i,16($tp) + bdnz- Lcopy +___ - $POP r14,`2*$SIZE_T`($sp) - $POP r15,`3*$SIZE_T`($sp) - $POP r16,`4*$SIZE_T`($sp) - $POP r17,`5*$SIZE_T`($sp) - $POP r18,`6*$SIZE_T`($sp) - $POP r19,`7*$SIZE_T`($sp) - $POP r20,`8*$SIZE_T`($sp) - $POP r21,`9*$SIZE_T`($sp) - $POP r22,`10*$SIZE_T`($sp) - $POP r23,`11*$SIZE_T`($sp) - lfd f14,`12*$SIZE_T+0`($sp) - lfd f15,`12*$SIZE_T+8`($sp) - lfd f16,`12*$SIZE_T+16`($sp) - lfd f17,`12*$SIZE_T+24`($sp) - lfd f18,`12*$SIZE_T+32`($sp) - lfd f19,`12*$SIZE_T+40`($sp) - lfd f20,`12*$SIZE_T+48`($sp) - lfd f21,`12*$SIZE_T+56`($sp) - lfd f22,`12*$SIZE_T+64`($sp) - lfd f23,`12*$SIZE_T+72`($sp) - lfd f24,`12*$SIZE_T+80`($sp) - lfd f25,`12*$SIZE_T+88`($sp) - $POP $sp,0($sp) +$code.=<<___; + $POP $i,0($sp) li r3,1 ; signal "handled" + $POP r22,`-12*8-10*$SIZE_T`($i) + $POP r23,`-12*8-9*$SIZE_T`($i) + $POP r24,`-12*8-8*$SIZE_T`($i) + $POP r25,`-12*8-7*$SIZE_T`($i) + $POP r26,`-12*8-6*$SIZE_T`($i) + $POP r27,`-12*8-5*$SIZE_T`($i) + $POP r28,`-12*8-4*$SIZE_T`($i) + $POP r29,`-12*8-3*$SIZE_T`($i) + $POP r30,`-12*8-2*$SIZE_T`($i) + $POP r31,`-12*8-1*$SIZE_T`($i) + lfd f20,`-12*8`($i) + lfd f21,`-11*8`($i) + lfd f22,`-10*8`($i) + lfd f23,`-9*8`($i) + lfd f24,`-8*8`($i) + lfd f25,`-7*8`($i) + lfd f26,`-6*8`($i) + lfd f27,`-5*8`($i) + lfd f28,`-4*8`($i) + lfd f29,`-3*8`($i) + lfd f30,`-2*8`($i) + lfd f31,`-1*8`($i) + mr $sp,$i blr .long 0 -.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " + .byte 0,12,4,0,0x8c,10,6,0 + .long 0 + +.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/bn/asm/s390x-gf2m.pl b/app/openssl/crypto/bn/asm/s390x-gf2m.pl new file mode 100644 index 00000000..9d18d40e --- /dev/null +++ b/app/openssl/crypto/bn/asm/s390x-gf2m.pl @@ -0,0 +1,221 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# May 2011 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication used +# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for +# the time being... gcc 4.3 appeared to generate poor code, therefore +# the effort. And indeed, the module delivers 55%-90%(*) improvement +# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit +# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196. +# This is for 64-bit build. In 32-bit "highgprs" case improvement is +# even higher, for example on z990 it was measured 80%-150%. ECDSA +# sign is modest 9%-12% faster. Keep in mind that these coefficients +# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is +# burnt in it... +# +# (*) gcc 4.1 was observed to deliver better results than gcc 4.3, +# so that improvement coefficients can vary from one specific +# setup to another. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$stdframe=16*$SIZE_T+4*8; + +$rp="%r2"; +$a1="%r3"; +$a0="%r4"; +$b1="%r5"; +$b0="%r6"; + +$ra="%r14"; +$sp="%r15"; + +@T=("%r0","%r1"); +@i=("%r12","%r13"); + +($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11)); +($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8; + +$code.=<<___; +.text + +.type _mul_1x1,\@function +.align 16 +_mul_1x1: + lgr $a1,$a + sllg $a2,$a,1 + sllg $a4,$a,2 + sllg $a8,$a,3 + + srag $lo,$a1,63 # broadcast 63rd bit + nihh $a1,0x1fff + srag @i[0],$a2,63 # broadcast 62nd bit + nihh $a2,0x3fff + srag @i[1],$a4,63 # broadcast 61st bit + nihh $a4,0x7fff + ngr $lo,$b + ngr @i[0],$b + ngr @i[1],$b + + lghi @T[0],0 + lgr $a12,$a1 + stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0 + xgr $a12,$a2 + stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1 + lgr $a48,$a4 + stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2 + xgr $a48,$a8 + stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2 + xgr $a1,$a4 + + stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4 + xgr $a2,$a4 + stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4 + xgr $a12,$a4 + stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4 + xgr $a1,$a48 + stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4 + xgr $a2,$a48 + + stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8 + xgr $a12,$a48 + stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8 + xgr $a1,$a4 + stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8 + xgr $a2,$a4 + stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8 + + xgr $a12,$a4 + stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8 + srlg $hi,$lo,1 + stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8 + sllg $lo,$lo,63 + stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8 + srlg @T[0],@i[0],2 + stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8 + + lghi $mask,`0xf<<3` + sllg $a1,@i[0],62 + sllg @i[0],$b,3 + srlg @T[1],@i[1],3 + ngr @i[0],$mask + sllg $a2,@i[1],61 + srlg @i[1],$b,4-3 + xgr $hi,@T[0] + ngr @i[1],$mask + xgr $lo,$a1 + xgr $hi,@T[1] + xgr $lo,$a2 + + xg $lo,$stdframe(@i[0],$sp) + srlg @i[0],$b,8-3 + ngr @i[0],$mask +___ +for($n=1;$n<14;$n++) { +$code.=<<___; + lg @T[1],$stdframe(@i[1],$sp) + srlg @i[1],$b,`($n+2)*4`-3 + sllg @T[0],@T[1],`$n*4` + ngr @i[1],$mask + srlg @T[1],@T[1],`64-$n*4` + xgr $lo,@T[0] + xgr $hi,@T[1] +___ + push(@i,shift(@i)); push(@T,shift(@T)); +} +$code.=<<___; + lg @T[1],$stdframe(@i[1],$sp) + sllg @T[0],@T[1],`$n*4` + srlg @T[1],@T[1],`64-$n*4` + xgr $lo,@T[0] + xgr $hi,@T[1] + + lg @T[0],$stdframe(@i[0],$sp) + sllg @T[1],@T[0],`($n+1)*4` + srlg @T[0],@T[0],`64-($n+1)*4` + xgr $lo,@T[1] + xgr $hi,@T[0] + + br $ra +.size _mul_1x1,.-_mul_1x1 + +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,\@function +.align 16 +bn_GF2m_mul_2x2: + stm${g} %r3,%r15,3*$SIZE_T($sp) + + lghi %r1,-$stdframe-128 + la %r0,0($sp) + la $sp,0(%r1,$sp) # alloca + st${g} %r0,0($sp) # back chain +___ +if ($SIZE_T==8) { +my @r=map("%r$_",(6..9)); +$code.=<<___; + bras $ra,_mul_1x1 # a1·b1 + stmg $lo,$hi,16($rp) + + lg $a,`$stdframe+128+4*$SIZE_T`($sp) + lg $b,`$stdframe+128+6*$SIZE_T`($sp) + bras $ra,_mul_1x1 # a0·b0 + stmg $lo,$hi,0($rp) + + lg $a,`$stdframe+128+3*$SIZE_T`($sp) + lg $b,`$stdframe+128+5*$SIZE_T`($sp) + xg $a,`$stdframe+128+4*$SIZE_T`($sp) + xg $b,`$stdframe+128+6*$SIZE_T`($sp) + bras $ra,_mul_1x1 # (a0+a1)·(b0+b1) + lmg @r[0],@r[3],0($rp) + + xgr $lo,$hi + xgr $hi,@r[1] + xgr $lo,@r[0] + xgr $hi,@r[2] + xgr $lo,@r[3] + xgr $hi,@r[3] + xgr $lo,$hi + stg $hi,16($rp) + stg $lo,8($rp) +___ +} else { +$code.=<<___; + sllg %r3,%r3,32 + sllg %r5,%r5,32 + or %r3,%r4 + or %r5,%r6 + bras $ra,_mul_1x1 + rllg $lo,$lo,32 + rllg $hi,$hi,32 + stmg $lo,$hi,0($rp) +___ +} +$code.=<<___; + lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp) + br $ra +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by " +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/s390x-mont.pl b/app/openssl/crypto/bn/asm/s390x-mont.pl index f61246f5..9fd64e81 100644 --- a/app/openssl/crypto/bn/asm/s390x-mont.pl +++ b/app/openssl/crypto/bn/asm/s390x-mont.pl @@ -32,6 +32,33 @@ # Reschedule to minimize/avoid Address Generation Interlock hazard, # make inner loops counter-based. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG +# is achieved by swapping words after 64-bit loads, follow _dswap-s. +# On z990 it was measured to perform 2.6-2.2 times better than +# compiler-generated code, less for longer keys... + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$stdframe=16*$SIZE_T+4*8; + $mn0="%r0"; $num="%r1"; @@ -60,34 +87,44 @@ $code.=<<___; .globl bn_mul_mont .type bn_mul_mont,\@function bn_mul_mont: - lgf $num,164($sp) # pull $num - sla $num,3 # $num to enumerate bytes + lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num + sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes la $bp,0($num,$bp) - stg %r2,16($sp) + st${g} %r2,2*$SIZE_T($sp) cghi $num,16 # lghi %r2,0 # blr %r14 # if($num<16) return 0; +___ +$code.=<<___ if ($flavour =~ /3[12]/); + tmll $num,4 + bnzr %r14 # if ($num&1) return 0; +___ +$code.=<<___ if ($flavour !~ /3[12]/); cghi $num,96 # bhr %r14 # if($num>96) return 0; +___ +$code.=<<___; + stm${g} %r3,%r15,3*$SIZE_T($sp) - stmg %r3,%r15,24($sp) - - lghi $rp,-160-8 # leave room for carry bit + lghi $rp,-$stdframe-8 # leave room for carry bit lcgr $j,$num # -$num lgr %r0,$sp la $rp,0($rp,$sp) la $sp,0($j,$rp) # alloca - stg %r0,0($sp) # back chain + st${g} %r0,0($sp) # back chain sra $num,3 # restore $num la $bp,0($j,$bp) # restore $bp ahi $num,-1 # adjust $num for inner loop lg $n0,0($n0) # pull n0 + _dswap $n0 lg $bi,0($bp) + _dswap $bi lg $alo,0($ap) + _dswap $alo mlgr $ahi,$bi # ap[0]*bp[0] lgr $AHI,$ahi @@ -95,6 +132,7 @@ bn_mul_mont: msgr $mn0,$n0 lg $nlo,0($np) # + _dswap $nlo mlgr $nhi,$mn0 # np[0]*m1 algr $nlo,$alo # +="tp[0]" lghi $NHI,0 @@ -106,12 +144,14 @@ bn_mul_mont: .align 16 .L1st: lg $alo,0($j,$ap) + _dswap $alo mlgr $ahi,$bi # ap[j]*bp[0] algr $alo,$AHI lghi $AHI,0 alcgr $AHI,$ahi lg $nlo,0($j,$np) + _dswap $nlo mlgr $nhi,$mn0 # np[j]*m1 algr $nlo,$NHI lghi $NHI,0 @@ -119,22 +159,24 @@ bn_mul_mont: algr $nlo,$alo alcgr $NHI,$nhi - stg $nlo,160-8($j,$sp) # tp[j-1]= + stg $nlo,$stdframe-8($j,$sp) # tp[j-1]= la $j,8($j) # j++ brct $count,.L1st algr $NHI,$AHI lghi $AHI,0 alcgr $AHI,$AHI # upmost overflow bit - stg $NHI,160-8($j,$sp) - stg $AHI,160($j,$sp) + stg $NHI,$stdframe-8($j,$sp) + stg $AHI,$stdframe($j,$sp) la $bp,8($bp) # bp++ .Louter: lg $bi,0($bp) # bp[i] + _dswap $bi lg $alo,0($ap) + _dswap $alo mlgr $ahi,$bi # ap[0]*bp[i] - alg $alo,160($sp) # +=tp[0] + alg $alo,$stdframe($sp) # +=tp[0] lghi $AHI,0 alcgr $AHI,$ahi @@ -142,6 +184,7 @@ bn_mul_mont: msgr $mn0,$n0 # tp[0]*n0 lg $nlo,0($np) # np[0] + _dswap $nlo mlgr $nhi,$mn0 # np[0]*m1 algr $nlo,$alo # +="tp[0]" lghi $NHI,0 @@ -153,14 +196,16 @@ bn_mul_mont: .align 16 .Linner: lg $alo,0($j,$ap) + _dswap $alo mlgr $ahi,$bi # ap[j]*bp[i] algr $alo,$AHI lghi $AHI,0 alcgr $ahi,$AHI - alg $alo,160($j,$sp)# +=tp[j] + alg $alo,$stdframe($j,$sp)# +=tp[j] alcgr $AHI,$ahi lg $nlo,0($j,$np) + _dswap $nlo mlgr $nhi,$mn0 # np[j]*m1 algr $nlo,$NHI lghi $NHI,0 @@ -168,31 +213,33 @@ bn_mul_mont: algr $nlo,$alo # +="tp[j]" alcgr $NHI,$nhi - stg $nlo,160-8($j,$sp) # tp[j-1]= + stg $nlo,$stdframe-8($j,$sp) # tp[j-1]= la $j,8($j) # j++ brct $count,.Linner algr $NHI,$AHI lghi $AHI,0 alcgr $AHI,$AHI - alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit + alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit lghi $ahi,0 alcgr $AHI,$ahi # new upmost overflow bit - stg $NHI,160-8($j,$sp) - stg $AHI,160($j,$sp) + stg $NHI,$stdframe-8($j,$sp) + stg $AHI,$stdframe($j,$sp) la $bp,8($bp) # bp++ - clg $bp,160+8+32($j,$sp) # compare to &bp[num] + cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num] jne .Louter - lg $rp,160+8+16($j,$sp) # reincarnate rp - la $ap,160($sp) + l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp + la $ap,$stdframe($sp) ahi $num,1 # restore $num, incidentally clears "borrow" la $j,0(%r0) lr $count,$num .Lsub: lg $alo,0($j,$ap) - slbg $alo,0($j,$np) + lg $nlo,0($j,$np) + _dswap $nlo + slbgr $alo,$nlo stg $alo,0($j,$rp) la $j,8($j) brct $count,.Lsub @@ -207,19 +254,24 @@ bn_mul_mont: la $j,0(%r0) lgr $count,$num -.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh - stg $j,160($j,$sp) # zap tp +.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh + _dswap $alo + stg $j,$stdframe($j,$sp) # zap tp stg $alo,0($j,$rp) la $j,8($j) brct $count,.Lcopy - la %r1,160+8+48($j,$sp) - lmg %r6,%r15,0(%r1) + la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp) + lm${g} %r6,%r15,0(%r1) lghi %r2,1 # signal "processed" br %r14 .size bn_mul_mont,.-bn_mul_mont .string "Montgomery Multiplication for s390x, CRYPTOGAMS by " ___ -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e; + print $_,"\n"; +} close STDOUT; diff --git a/app/openssl/crypto/bn/asm/x86-gf2m.S b/app/openssl/crypto/bn/asm/x86-gf2m.S new file mode 100644 index 00000000..9ed29ae0 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86-gf2m.S @@ -0,0 +1,347 @@ +.file "crypto/bn/asm/x86-gf2m.s" +.text +.type _mul_1x1_mmx,@function +.align 16 +_mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.size _mul_1x1_mmx,.-_mul_1x1_mmx +.type _mul_1x1_ialu,@function +.align 16 +_mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.size _mul_1x1_ialu,.-_mul_1x1_ialu +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: +.L_bn_GF2m_mul_2x2_begin: + call .L000PIC_me_up +.L000PIC_me_up: + popl %edx + leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx + movl OPENSSL_ia32cap_P@GOT(%edx),%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz .L001ialu + testl $16777216,%eax + jz .L002mmx + testl $2,%edx + jz .L002mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 16 +.L002mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call _mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 16 +.L001ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call _mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call _mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call _mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/bn/asm/x86-gf2m.pl b/app/openssl/crypto/bn/asm/x86-gf2m.pl new file mode 100644 index 00000000..b5795302 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86-gf2m.pl @@ -0,0 +1,313 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# May 2011 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication used +# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for +# the time being... Except that it has three code paths: pure integer +# code suitable for any x86 CPU, MMX code suitable for PIII and later +# and PCLMULQDQ suitable for Westmere and later. Improvement varies +# from one benchmark and µ-arch to another. Below are interval values +# for 163- and 571-bit ECDH benchmarks relative to compiler-generated +# code: +# +# PIII 16%-30% +# P4 12%-12% +# Opteron 18%-40% +# Core2 19%-44% +# Atom 38%-64% +# Westmere 53%-121%(PCLMULQDQ)/20%-32%(MMX) +# Sandy Bridge 72%-127%(PCLMULQDQ)/27%-23%(MMX) +# +# Note that above improvement coefficients are not coefficients for +# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result +# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark +# is more and more dominated by other subroutines, most notably by +# BN_GF2m_mod[_mul]_arr... + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386"); + +$sse2=0; +for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } + +&external_label("OPENSSL_ia32cap_P") if ($sse2); + +$a="eax"; +$b="ebx"; +($a1,$a2,$a4)=("ecx","edx","ebp"); + +$R="mm0"; +@T=("mm1","mm2"); +($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5"); +@i=("esi","edi"); + + if (!$x86only) { +&function_begin_B("_mul_1x1_mmx"); + &sub ("esp",32+4); + &mov ($a1,$a); + &lea ($a2,&DWP(0,$a,$a)); + &and ($a1,0x3fffffff); + &lea ($a4,&DWP(0,$a2,$a2)); + &mov (&DWP(0*4,"esp"),0); + &and ($a2,0x7fffffff); + &movd ($A,$a); + &movd ($B,$b); + &mov (&DWP(1*4,"esp"),$a1); # a1 + &xor ($a1,$a2); # a1^a2 + &pxor ($B31,$B31); + &pxor ($B30,$B30); + &mov (&DWP(2*4,"esp"),$a2); # a2 + &xor ($a2,$a4); # a2^a4 + &mov (&DWP(3*4,"esp"),$a1); # a1^a2 + &pcmpgtd($B31,$A); # broadcast 31st bit + &paddd ($A,$A); # $A<<=1 + &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4 + &mov (&DWP(4*4,"esp"),$a4); # a4 + &xor ($a4,$a2); # a2=a4^a2^a4 + &pand ($B31,$B); + &pcmpgtd($B30,$A); # broadcast 30th bit + &mov (&DWP(5*4,"esp"),$a1); # a1^a4 + &xor ($a4,$a1); # a1^a2^a4 + &psllq ($B31,31); + &pand ($B30,$B); + &mov (&DWP(6*4,"esp"),$a2); # a2^a4 + &mov (@i[0],0x7); + &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4 + &mov ($a4,@i[0]); + &and (@i[0],$b); + &shr ($b,3); + &mov (@i[1],$a4); + &psllq ($B30,30); + &and (@i[1],$b); + &shr ($b,3); + &movd ($R,&DWP(0,"esp",@i[0],4)); + &mov (@i[0],$a4); + &and (@i[0],$b); + &shr ($b,3); + for($n=1;$n<9;$n++) { + &movd (@T[1],&DWP(0,"esp",@i[1],4)); + &mov (@i[1],$a4); + &psllq (@T[1],3*$n); + &and (@i[1],$b); + &shr ($b,3); + &pxor ($R,@T[1]); + + push(@i,shift(@i)); push(@T,shift(@T)); + } + &movd (@T[1],&DWP(0,"esp",@i[1],4)); + &pxor ($R,$B30); + &psllq (@T[1],3*$n++); + &pxor ($R,@T[1]); + + &movd (@T[0],&DWP(0,"esp",@i[0],4)); + &pxor ($R,$B31); + &psllq (@T[0],3*$n); + &add ("esp",32+4); + &pxor ($R,@T[0]); + &ret (); +&function_end_B("_mul_1x1_mmx"); + } + +($lo,$hi)=("eax","edx"); +@T=("ecx","ebp"); + +&function_begin_B("_mul_1x1_ialu"); + &sub ("esp",32+4); + &mov ($a1,$a); + &lea ($a2,&DWP(0,$a,$a)); + &lea ($a4,&DWP(0,"",$a,4)); + &and ($a1,0x3fffffff); + &lea (@i[1],&DWP(0,$lo,$lo)); + &sar ($lo,31); # broadcast 31st bit + &mov (&DWP(0*4,"esp"),0); + &and ($a2,0x7fffffff); + &mov (&DWP(1*4,"esp"),$a1); # a1 + &xor ($a1,$a2); # a1^a2 + &mov (&DWP(2*4,"esp"),$a2); # a2 + &xor ($a2,$a4); # a2^a4 + &mov (&DWP(3*4,"esp"),$a1); # a1^a2 + &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4 + &mov (&DWP(4*4,"esp"),$a4); # a4 + &xor ($a4,$a2); # a2=a4^a2^a4 + &mov (&DWP(5*4,"esp"),$a1); # a1^a4 + &xor ($a4,$a1); # a1^a2^a4 + &sar (@i[1],31); # broardcast 30th bit + &and ($lo,$b); + &mov (&DWP(6*4,"esp"),$a2); # a2^a4 + &and (@i[1],$b); + &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4 + &mov ($hi,$lo); + &shl ($lo,31); + &mov (@T[0],@i[1]); + &shr ($hi,1); + + &mov (@i[0],0x7); + &shl (@i[1],30); + &and (@i[0],$b); + &shr (@T[0],2); + &xor ($lo,@i[1]); + + &shr ($b,3); + &mov (@i[1],0x7); # 5-byte instruction!? + &and (@i[1],$b); + &shr ($b,3); + &xor ($hi,@T[0]); + &xor ($lo,&DWP(0,"esp",@i[0],4)); + &mov (@i[0],0x7); + &and (@i[0],$b); + &shr ($b,3); + for($n=1;$n<9;$n++) { + &mov (@T[1],&DWP(0,"esp",@i[1],4)); + &mov (@i[1],0x7); + &mov (@T[0],@T[1]); + &shl (@T[1],3*$n); + &and (@i[1],$b); + &shr (@T[0],32-3*$n); + &xor ($lo,@T[1]); + &shr ($b,3); + &xor ($hi,@T[0]); + + push(@i,shift(@i)); push(@T,shift(@T)); + } + &mov (@T[1],&DWP(0,"esp",@i[1],4)); + &mov (@T[0],@T[1]); + &shl (@T[1],3*$n); + &mov (@i[1],&DWP(0,"esp",@i[0],4)); + &shr (@T[0],32-3*$n); $n++; + &mov (@i[0],@i[1]); + &xor ($lo,@T[1]); + &shl (@i[1],3*$n); + &xor ($hi,@T[0]); + &shr (@i[0],32-3*$n); + &xor ($lo,@i[1]); + &xor ($hi,@i[0]); + + &add ("esp",32+4); + &ret (); +&function_end_B("_mul_1x1_ialu"); + +# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); +&function_begin_B("bn_GF2m_mul_2x2"); +if (!$x86only) { + &picmeup("edx","OPENSSL_ia32cap_P"); + &mov ("eax",&DWP(0,"edx")); + &mov ("edx",&DWP(4,"edx")); + &test ("eax",1<<23); # check MMX bit + &jz (&label("ialu")); +if ($sse2) { + &test ("eax",1<<24); # check FXSR bit + &jz (&label("mmx")); + &test ("edx",1<<1); # check PCLMULQDQ bit + &jz (&label("mmx")); + + &movups ("xmm0",&QWP(8,"esp")); + &shufps ("xmm0","xmm0",0b10110001); + &pclmulqdq ("xmm0","xmm0",1); + &mov ("eax",&DWP(4,"esp")); + &movups (&QWP(0,"eax"),"xmm0"); + &ret (); + +&set_label("mmx",16); +} + &push ("ebp"); + &push ("ebx"); + &push ("esi"); + &push ("edi"); + &mov ($a,&wparam(1)); + &mov ($b,&wparam(3)); + &call ("_mul_1x1_mmx"); # a1·b1 + &movq ("mm7",$R); + + &mov ($a,&wparam(2)); + &mov ($b,&wparam(4)); + &call ("_mul_1x1_mmx"); # a0·b0 + &movq ("mm6",$R); + + &mov ($a,&wparam(1)); + &mov ($b,&wparam(3)); + &xor ($a,&wparam(2)); + &xor ($b,&wparam(4)); + &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1) + &pxor ($R,"mm7"); + &mov ($a,&wparam(0)); + &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0 + + &movq ($A,$R); + &psllq ($R,32); + &pop ("edi"); + &psrlq ($A,32); + &pop ("esi"); + &pxor ($R,"mm6"); + &pop ("ebx"); + &pxor ($A,"mm7"); + &movq (&QWP(0,$a),$R); + &pop ("ebp"); + &movq (&QWP(8,$a),$A); + &emms (); + &ret (); +&set_label("ialu",16); +} + &push ("ebp"); + &push ("ebx"); + &push ("esi"); + &push ("edi"); + &stack_push(4+1); + + &mov ($a,&wparam(1)); + &mov ($b,&wparam(3)); + &call ("_mul_1x1_ialu"); # a1·b1 + &mov (&DWP(8,"esp"),$lo); + &mov (&DWP(12,"esp"),$hi); + + &mov ($a,&wparam(2)); + &mov ($b,&wparam(4)); + &call ("_mul_1x1_ialu"); # a0·b0 + &mov (&DWP(0,"esp"),$lo); + &mov (&DWP(4,"esp"),$hi); + + &mov ($a,&wparam(1)); + &mov ($b,&wparam(3)); + &xor ($a,&wparam(2)); + &xor ($b,&wparam(4)); + &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1) + + &mov ("ebp",&wparam(0)); + @r=("ebx","ecx","edi","esi"); + &mov (@r[0],&DWP(0,"esp")); + &mov (@r[1],&DWP(4,"esp")); + &mov (@r[2],&DWP(8,"esp")); + &mov (@r[3],&DWP(12,"esp")); + + &xor ($lo,$hi); + &xor ($hi,@r[1]); + &xor ($lo,@r[0]); + &mov (&DWP(0,"ebp"),@r[0]); + &xor ($hi,@r[2]); + &mov (&DWP(12,"ebp"),@r[3]); + &xor ($lo,@r[3]); + &stack_pop(4+1); + &xor ($hi,@r[3]); + &pop ("edi"); + &xor ($lo,$hi); + &pop ("esi"); + &mov (&DWP(8,"ebp"),$hi); + &pop ("ebx"); + &mov (&DWP(4,"ebp"),$lo); + &pop ("ebp"); + &ret (); +&function_end_B("bn_GF2m_mul_2x2"); + +&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by "); + +&asm_finish(); diff --git a/app/openssl/crypto/bn/asm/x86-mont.S b/app/openssl/crypto/bn/asm/x86-mont.S new file mode 100644 index 00000000..c701e9e3 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86-mont.S @@ -0,0 +1,460 @@ +.file "crypto/bn/asm/x86-mont.s" +.text +.globl bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: +.L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl .L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + movl %esp,%ebp + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%esp + negl %edi + movl %esp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%esp + xorl %esp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%esp + andl $-64,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %ebp,24(%esp) + call .L001PIC_me_up +.L001PIC_me_up: + popl %eax + leal _GLOBAL_OFFSET_TABLE_+[.-.L001PIC_me_up](%eax),%eax + movl OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc .L002non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 16 +.L0031st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl .L0031st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +.L004outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +.L005inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz .L005inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle .L004outer + emms + jmp .L006common_tail +.align 16 +.L002non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz .L007bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 16 +.L008mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L008mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp .L0092ndmadd +.align 16 +.L0101stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L0101stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 16 +.L0092ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0092ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je .L006common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp .L0101stmadd +.align 16 +.L007bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 16 +.L011sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl .L011sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 16 +.L0123rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0123rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je .L006common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je .L013sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 16 +.L014sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle .L014sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +.L013sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp .L0123rdmadd +.align 16 +.L006common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 16 +.L015sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge .L015sub + sbbl $0,%eax + andl %eax,%esi + notl %eax + movl %edi,%ebp + andl %eax,%ebp + orl %ebp,%esi +.align 16 +.L016copy: + movl (%esi,%ebx,4),%eax + movl %eax,(%edi,%ebx,4) + movl %ecx,32(%esp,%ebx,4) + decl %ebx + jge .L016copy + movl 24(%esp),%esp + movl $1,%eax +.L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_mont,.-.L_bn_mul_mont_begin +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/bn/asm/x86-mont.pl b/app/openssl/crypto/bn/asm/x86-mont.pl index 5cd3cd2e..e8f6b050 100755 --- a/app/openssl/crypto/bn/asm/x86-mont.pl +++ b/app/openssl/crypto/bn/asm/x86-mont.pl @@ -527,8 +527,10 @@ $sbit=$num; &jle (&label("sqradd")); &mov ($carry,"edx"); - &lea ("edx",&DWP(0,$sbit,"edx",2)); + &add ("edx","edx"); &shr ($carry,31); + &add ("edx",$sbit); + &adc ($carry,0); &set_label("sqrlast"); &mov ($word,$_n0); &mov ($inp,$_np); diff --git a/app/openssl/crypto/bn/asm/x86_64-gcc.c b/app/openssl/crypto/bn/asm/x86_64-gcc.c index acb0b401..329946e5 100644 --- a/app/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/app/openssl/crypto/bn/asm/x86_64-gcc.c @@ -66,7 +66,7 @@ #undef sqr /* - * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; + * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; * "g"(0) let the compiler to decide where does it * want to keep the value of zero; */ diff --git a/app/openssl/crypto/bn/asm/x86_64-gf2m.S b/app/openssl/crypto/bn/asm/x86_64-gf2m.S new file mode 100644 index 00000000..ccd2ed70 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86_64-gf2m.S @@ -0,0 +1,291 @@ +.text + +.type _mul_1x1,@function +.align 16 +_mul_1x1: + subq $128+8,%rsp + movq $-1,%r9 + leaq (%rax,%rax,1),%rsi + shrq $3,%r9 + leaq (,%rax,4),%rdi + andq %rax,%r9 + leaq (,%rax,8),%r12 + sarq $63,%rax + leaq (%r9,%r9,1),%r10 + sarq $63,%rsi + leaq (,%r9,4),%r11 + andq %rbp,%rax + sarq $63,%rdi + movq %rax,%rdx + shlq $63,%rax + andq %rbp,%rsi + shrq $1,%rdx + movq %rsi,%rcx + shlq $62,%rsi + andq %rbp,%rdi + shrq $2,%rcx + xorq %rsi,%rax + movq %rdi,%rbx + shlq $61,%rdi + xorq %rcx,%rdx + shrq $3,%rbx + xorq %rdi,%rax + xorq %rbx,%rdx + + movq %r9,%r13 + movq $0,0(%rsp) + xorq %r10,%r13 + movq %r9,8(%rsp) + movq %r11,%r14 + movq %r10,16(%rsp) + xorq %r12,%r14 + movq %r13,24(%rsp) + + xorq %r11,%r9 + movq %r11,32(%rsp) + xorq %r11,%r10 + movq %r9,40(%rsp) + xorq %r11,%r13 + movq %r10,48(%rsp) + xorq %r14,%r9 + movq %r13,56(%rsp) + xorq %r14,%r10 + + movq %r12,64(%rsp) + xorq %r14,%r13 + movq %r9,72(%rsp) + xorq %r11,%r9 + movq %r10,80(%rsp) + xorq %r11,%r10 + movq %r13,88(%rsp) + + xorq %r11,%r13 + movq %r14,96(%rsp) + movq %r8,%rsi + movq %r9,104(%rsp) + andq %rbp,%rsi + movq %r10,112(%rsp) + shrq $4,%rbp + movq %r13,120(%rsp) + movq %r8,%rdi + andq %rbp,%rdi + shrq $4,%rbp + + movq (%rsp,%rsi,8),%xmm0 + movq %r8,%rsi + andq %rbp,%rsi + shrq $4,%rbp + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $4,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $60,%rbx + xorq %rcx,%rax + pslldq $1,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $12,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $52,%rbx + xorq %rcx,%rax + pslldq $2,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $20,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $44,%rbx + xorq %rcx,%rax + pslldq $3,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $28,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $36,%rbx + xorq %rcx,%rax + pslldq $4,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $36,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $28,%rbx + xorq %rcx,%rax + pslldq $5,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $44,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $20,%rbx + xorq %rcx,%rax + pslldq $6,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $52,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $12,%rbx + xorq %rcx,%rax + pslldq $7,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %rcx,%rbx + shlq $60,%rcx +.byte 102,72,15,126,198 + shrq $4,%rbx + xorq %rcx,%rax + psrldq $8,%xmm0 + xorq %rbx,%rdx +.byte 102,72,15,126,199 + xorq %rsi,%rax + xorq %rdi,%rdx + + addq $128+8,%rsp + .byte 0xf3,0xc3 +.Lend_mul_1x1: +.size _mul_1x1,.-_mul_1x1 + +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: + movq OPENSSL_ia32cap_P(%rip),%rax + btq $33,%rax + jnc .Lvanilla_mul_2x2 + +.byte 102,72,15,110,198 +.byte 102,72,15,110,201 +.byte 102,72,15,110,210 +.byte 102,73,15,110,216 + movdqa %xmm0,%xmm4 + movdqa %xmm1,%xmm5 +.byte 102,15,58,68,193,0 + pxor %xmm2,%xmm4 + pxor %xmm3,%xmm5 +.byte 102,15,58,68,211,0 +.byte 102,15,58,68,229,0 + xorps %xmm0,%xmm4 + xorps %xmm2,%xmm4 + movdqa %xmm4,%xmm5 + pslldq $8,%xmm4 + psrldq $8,%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm0 + movdqu %xmm2,0(%rdi) + movdqu %xmm0,16(%rdi) + .byte 0xf3,0xc3 + +.align 16 +.Lvanilla_mul_2x2: + leaq -136(%rsp),%rsp + movq %r14,80(%rsp) + movq %r13,88(%rsp) + movq %r12,96(%rsp) + movq %rbp,104(%rsp) + movq %rbx,112(%rsp) +.Lbody_mul_2x2: + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + movq %rdx,48(%rsp) + movq %rcx,56(%rsp) + movq %r8,64(%rsp) + + movq $15,%r8 + movq %rsi,%rax + movq %rcx,%rbp + call _mul_1x1 + movq %rax,16(%rsp) + movq %rdx,24(%rsp) + + movq 48(%rsp),%rax + movq 64(%rsp),%rbp + call _mul_1x1 + movq %rax,0(%rsp) + movq %rdx,8(%rsp) + + movq 40(%rsp),%rax + movq 56(%rsp),%rbp + xorq 48(%rsp),%rax + xorq 64(%rsp),%rbp + call _mul_1x1 + movq 0(%rsp),%rbx + movq 8(%rsp),%rcx + movq 16(%rsp),%rdi + movq 24(%rsp),%rsi + movq 32(%rsp),%rbp + + xorq %rdx,%rax + xorq %rcx,%rdx + xorq %rbx,%rax + movq %rbx,0(%rbp) + xorq %rdi,%rdx + movq %rsi,24(%rbp) + xorq %rsi,%rax + xorq %rsi,%rdx + xorq %rdx,%rax + movq %rdx,16(%rbp) + movq %rax,8(%rbp) + + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbp + movq 112(%rsp),%rbx + leaq 136(%rsp),%rsp + .byte 0xf3,0xc3 +.Lend_mul_2x2: +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/app/openssl/crypto/bn/asm/x86_64-gf2m.pl b/app/openssl/crypto/bn/asm/x86_64-gf2m.pl new file mode 100644 index 00000000..42bbec2f --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86_64-gf2m.pl @@ -0,0 +1,390 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# May 2011 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication used +# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for +# the time being... Except that it has two code paths: code suitable +# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and +# later. Improvement varies from one benchmark and µ-arch to another. +# Vanilla code path is at most 20% faster than compiler-generated code +# [not very impressive], while PCLMULQDQ - whole 85%-160% better on +# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that +# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not +# all CPU time is burnt in it... + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +($lo,$hi)=("%rax","%rdx"); $a=$lo; +($i0,$i1)=("%rsi","%rdi"); +($t0,$t1)=("%rbx","%rcx"); +($b,$mask)=("%rbp","%r8"); +($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15)); +($R,$Tx)=("%xmm0","%xmm1"); + +$code.=<<___; +.text + +.type _mul_1x1,\@abi-omnipotent +.align 16 +_mul_1x1: + sub \$128+8,%rsp + mov \$-1,$a1 + lea ($a,$a),$i0 + shr \$3,$a1 + lea (,$a,4),$i1 + and $a,$a1 # a1=a&0x1fffffffffffffff + lea (,$a,8),$a8 + sar \$63,$a # broadcast 63rd bit + lea ($a1,$a1),$a2 + sar \$63,$i0 # broadcast 62nd bit + lea (,$a1,4),$a4 + and $b,$a + sar \$63,$i1 # boardcast 61st bit + mov $a,$hi # $a is $lo + shl \$63,$lo + and $b,$i0 + shr \$1,$hi + mov $i0,$t1 + shl \$62,$i0 + and $b,$i1 + shr \$2,$t1 + xor $i0,$lo + mov $i1,$t0 + shl \$61,$i1 + xor $t1,$hi + shr \$3,$t0 + xor $i1,$lo + xor $t0,$hi + + mov $a1,$a12 + movq \$0,0(%rsp) # tab[0]=0 + xor $a2,$a12 # a1^a2 + mov $a1,8(%rsp) # tab[1]=a1 + mov $a4,$a48 + mov $a2,16(%rsp) # tab[2]=a2 + xor $a8,$a48 # a4^a8 + mov $a12,24(%rsp) # tab[3]=a1^a2 + + xor $a4,$a1 + mov $a4,32(%rsp) # tab[4]=a4 + xor $a4,$a2 + mov $a1,40(%rsp) # tab[5]=a1^a4 + xor $a4,$a12 + mov $a2,48(%rsp) # tab[6]=a2^a4 + xor $a48,$a1 # a1^a4^a4^a8=a1^a8 + mov $a12,56(%rsp) # tab[7]=a1^a2^a4 + xor $a48,$a2 # a2^a4^a4^a8=a1^a8 + + mov $a8,64(%rsp) # tab[8]=a8 + xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8 + mov $a1,72(%rsp) # tab[9]=a1^a8 + xor $a4,$a1 # a1^a8^a4 + mov $a2,80(%rsp) # tab[10]=a2^a8 + xor $a4,$a2 # a2^a8^a4 + mov $a12,88(%rsp) # tab[11]=a1^a2^a8 + + xor $a4,$a12 # a1^a2^a8^a4 + mov $a48,96(%rsp) # tab[12]=a4^a8 + mov $mask,$i0 + mov $a1,104(%rsp) # tab[13]=a1^a4^a8 + and $b,$i0 + mov $a2,112(%rsp) # tab[14]=a2^a4^a8 + shr \$4,$b + mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8 + mov $mask,$i1 + and $b,$i1 + shr \$4,$b + + movq (%rsp,$i0,8),$R # half of calculations is done in SSE2 + mov $mask,$i0 + and $b,$i0 + shr \$4,$b +___ + for ($n=1;$n<8;$n++) { + $code.=<<___; + mov (%rsp,$i1,8),$t1 + mov $mask,$i1 + mov $t1,$t0 + shl \$`8*$n-4`,$t1 + and $b,$i1 + movq (%rsp,$i0,8),$Tx + shr \$`64-(8*$n-4)`,$t0 + xor $t1,$lo + pslldq \$$n,$Tx + mov $mask,$i0 + shr \$4,$b + xor $t0,$hi + and $b,$i0 + shr \$4,$b + pxor $Tx,$R +___ + } +$code.=<<___; + mov (%rsp,$i1,8),$t1 + mov $t1,$t0 + shl \$`8*$n-4`,$t1 + movq $R,$i0 + shr \$`64-(8*$n-4)`,$t0 + xor $t1,$lo + psrldq \$8,$R + xor $t0,$hi + movq $R,$i1 + xor $i0,$lo + xor $i1,$hi + + add \$128+8,%rsp + ret +.Lend_mul_1x1: +.size _mul_1x1,.-_mul_1x1 +___ + +($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order + +$code.=<<___; +.extern OPENSSL_ia32cap_P +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,\@abi-omnipotent +.align 16 +bn_GF2m_mul_2x2: + mov OPENSSL_ia32cap_P(%rip),%rax + bt \$33,%rax + jnc .Lvanilla_mul_2x2 + + movq $a1,%xmm0 + movq $b1,%xmm1 + movq $a0,%xmm2 +___ +$code.=<<___ if ($win64); + movq 40(%rsp),%xmm3 +___ +$code.=<<___ if (!$win64); + movq $b0,%xmm3 +___ +$code.=<<___; + movdqa %xmm0,%xmm4 + movdqa %xmm1,%xmm5 + pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 + pxor %xmm2,%xmm4 + pxor %xmm3,%xmm5 + pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 + pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) + xorps %xmm0,%xmm4 + xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 + movdqa %xmm4,%xmm5 + pslldq \$8,%xmm4 + psrldq \$8,%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm0 + movdqu %xmm2,0($rp) + movdqu %xmm0,16($rp) + ret + +.align 16 +.Lvanilla_mul_2x2: + lea -8*17(%rsp),%rsp +___ +$code.=<<___ if ($win64); + mov `8*17+40`(%rsp),$b0 + mov %rdi,8*15(%rsp) + mov %rsi,8*16(%rsp) +___ +$code.=<<___; + mov %r14,8*10(%rsp) + mov %r13,8*11(%rsp) + mov %r12,8*12(%rsp) + mov %rbp,8*13(%rsp) + mov %rbx,8*14(%rsp) +.Lbody_mul_2x2: + mov $rp,32(%rsp) # save the arguments + mov $a1,40(%rsp) + mov $a0,48(%rsp) + mov $b1,56(%rsp) + mov $b0,64(%rsp) + + mov \$0xf,$mask + mov $a1,$a + mov $b1,$b + call _mul_1x1 # a1·b1 + mov $lo,16(%rsp) + mov $hi,24(%rsp) + + mov 48(%rsp),$a + mov 64(%rsp),$b + call _mul_1x1 # a0·b0 + mov $lo,0(%rsp) + mov $hi,8(%rsp) + + mov 40(%rsp),$a + mov 56(%rsp),$b + xor 48(%rsp),$a + xor 64(%rsp),$b + call _mul_1x1 # (a0+a1)·(b0+b1) +___ + @r=("%rbx","%rcx","%rdi","%rsi"); +$code.=<<___; + mov 0(%rsp),@r[0] + mov 8(%rsp),@r[1] + mov 16(%rsp),@r[2] + mov 24(%rsp),@r[3] + mov 32(%rsp),%rbp + + xor $hi,$lo + xor @r[1],$hi + xor @r[0],$lo + mov @r[0],0(%rbp) + xor @r[2],$hi + mov @r[3],24(%rbp) + xor @r[3],$lo + xor @r[3],$hi + xor $hi,$lo + mov $hi,16(%rbp) + mov $lo,8(%rbp) + + mov 8*10(%rsp),%r14 + mov 8*11(%rsp),%r13 + mov 8*12(%rsp),%r12 + mov 8*13(%rsp),%rbp + mov 8*14(%rsp),%rbx +___ +$code.=<<___ if ($win64); + mov 8*15(%rsp),%rdi + mov 8*16(%rsp),%rsi +___ +$code.=<<___; + lea 8*17(%rsp),%rsp + ret +.Lend_mul_2x2: +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by " +.align 16 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind + +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 152($context),%rax # pull context->Rsp + mov 248($context),%rbx # pull context->Rip + + lea .Lbody_mul_2x2(%rip),%r10 + cmp %r10,%rbx # context->Rip<"prologue" label + jb .Lin_prologue + + mov 8*10(%rax),%r14 # mimic epilogue + mov 8*11(%rax),%r13 + mov 8*12(%rax),%r12 + mov 8*13(%rax),%rbp + mov 8*14(%rax),%rbx + mov 8*15(%rax),%rdi + mov 8*16(%rax),%rsi + + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + +.Lin_prologue: + lea 8*17(%rax),%rax + mov %rax,152($context) # restore context->Rsp + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva _mul_1x1 + .rva .Lend_mul_1x1 + .rva .LSEH_info_1x1 + + .rva .Lvanilla_mul_2x2 + .rva .Lend_mul_2x2 + .rva .LSEH_info_2x2 +.section .xdata +.align 8 +.LSEH_info_1x1: + .byte 0x01,0x07,0x02,0x00 + .byte 0x07,0x01,0x11,0x00 # sub rsp,128+8 +.LSEH_info_2x2: + .byte 9,0,0,0 + .rva se_handler +___ +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/asm/x86_64-mont.S b/app/openssl/crypto/bn/asm/x86_64-mont.S new file mode 100644 index 00000000..95e29052 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86_64-mont.S @@ -0,0 +1,1374 @@ +.text + +.globl bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + cmpq %rsi,%rdx + jne .Lmul4x_enter + jmp .Lsqr4x_enter + +.align 16 +.Lmul_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 2(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jl .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.align 16 +.Lcopy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.Lmul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jl .Louter4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp .Lsub4x +.align 16 +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 + decq %r15 + jnz .Lcopy4x + + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont,.-bn_mul4x_mont +.type bn_sqr4x_mont,@function +.align 16 +bn_sqr4x_mont: +.Lsqr4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d + xorq %r10,%r10 + movq %rsp,%r11 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,2),%rsp + andq $-1024,%rsp + + + + + + + + + + + + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + movq %r8,48(%rsp) + movq %r11,56(%rsp) +.Lsqr4x_body: + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + + + movq 8(%rsi,%rcx,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 16 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + movq 16(%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,16(%rdi,%rcx,1) + + + movq 24(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 32(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + leaq 16(%rbp),%rbp + movq %r12,8(%rdi) + jmp .Lsqr4x_outer + +.align 16 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + movq -24(%rdi,%rbp,1),%r10 + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + addq -16(%rdi,%rbp,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + xorq %r12,%r12 + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + leaq 16(%rcx),%rcx + jmp .Lsqr4x_inner + +.align 16 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi) + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi) + + movq -8(%rsi),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + + xorq %r11,%r11 + addq %r12,%r10 + movq %rdx,%r13 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi) + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 64(%rsp,%r9,2),%rdi + xorq %r10,%r10 + movq -24(%rdi,%rbp,2),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,-40(%rdi,%rbp,2) + sbbq %r15,%r15 + jmp .Lsqr4x_shift_n_add + +.align 16 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi,%rbp,2) + adcq %rdx,%r8 + movq %r8,24(%rdi,%rbp,2) + sbbq %r15,%r15 + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) + movq 40(%rsp),%rsi + movq 48(%rsp),%r8 + xorq %rcx,%rcx + movq %r9,0(%rsp) + subq %r9,%rcx + movq 64(%rsp),%r10 + movq %r8,%r14 + leaq 64(%rsp,%r9,2),%rax + leaq 64(%rsp,%r9,1),%rdi + movq %rax,8(%rsp) + leaq (%rsi,%r9,1),%rsi + xorq %rbp,%rbp + + movq 0(%rsi,%rcx,1),%rax + movq 8(%rsi,%rcx,1),%r9 + imulq %r10,%r14 + movq %rax,%rbx + jmp .Lsqr4x_mont_outer + +.align 16 +.Lsqr4x_mont_outer: + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r8,%r15 + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + imulq %r11,%r15 + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + jmp .Lsqr4x_mont_inner + +.align 16 +.Lsqr4x_mont_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq (%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 8(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + cmpq $0,%rcx + jne .Lsqr4x_mont_inner + + subq 0(%rsp),%rcx + movq %r8,%r14 + + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi) + + xorq %r11,%r11 + addq (%rdi),%r10 + adcq $0,%r11 + movq 0(%rsi,%rcx,1),%rbx + addq %rbp,%r10 + adcq $0,%r11 + + imulq 16(%rdi,%rcx,1),%r14 + xorq %r12,%r12 + movq 8(%rsi,%rcx,1),%r9 + addq %r10,%r13 + movq 16(%rdi,%rcx,1),%r10 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + movq %r13,(%rdi) + + xorq %rbp,%rbp + addq 8(%rdi),%r12 + adcq %rbp,%rbp + addq %r11,%r12 + leaq 16(%rdi),%rdi + adcq $0,%rbp + movq %r12,-8(%rdi) + cmpq 8(%rsp),%rdi + jb .Lsqr4x_mont_outer + + movq 0(%rsp),%r9 + movq %rbp,(%rdi) + movq 64(%rsp,%r9,1),%rax + leaq 64(%rsp,%r9,1),%rbx + movq 40(%rsp),%rsi + shrq $5,%r9 + movq 8(%rbx),%rdx + xorq %rbp,%rbp + + movq 32(%rsp),%rdi + subq 0(%rsi),%rax + movq 16(%rbx),%r10 + movq 24(%rbx),%r11 + sbbq 8(%rsi),%rdx + leaq -1(%r9),%rcx + jmp .Lsqr4x_sub +.align 16 +.Lsqr4x_sub: + movq %rax,0(%rdi,%rbp,8) + movq %rdx,8(%rdi,%rbp,8) + sbbq 16(%rsi,%rbp,8),%r10 + movq 32(%rbx,%rbp,8),%rax + movq 40(%rbx,%rbp,8),%rdx + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + movq %r11,24(%rdi,%rbp,8) + sbbq 32(%rsi,%rbp,8),%rax + movq 48(%rbx,%rbp,8),%r10 + movq 56(%rbx,%rbp,8),%r11 + sbbq 40(%rsi,%rbp,8),%rdx + leaq 4(%rbp),%rbp + decq %rcx + jnz .Lsqr4x_sub + + movq %rax,0(%rdi,%rbp,8) + movq 32(%rbx,%rbp,8),%rax + sbbq 16(%rsi,%rbp,8),%r10 + movq %rdx,8(%rdi,%rbp,8) + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + + sbbq $0,%rax + movq %r11,24(%rdi,%rbp,8) + xorq %rbp,%rbp + andq %rax,%rbx + notq %rax + movq %rdi,%rsi + andq %rax,%rsi + leaq -1(%r9),%rcx + orq %rsi,%rbx + + pxor %xmm0,%xmm0 + leaq 64(%rsp,%r9,8),%rsi + movdqu (%rbx),%xmm1 + leaq (%rsi,%r9,8),%rsi + movdqa %xmm0,64(%rsp) + movdqa %xmm0,(%rsi) + movdqu %xmm1,(%rdi) + jmp .Lsqr4x_copy +.align 16 +.Lsqr4x_copy: + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqu 32(%rbx,%rbp,1),%xmm1 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,96(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqa %xmm0,32(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movdqu %xmm1,32(%rdi,%rbp,1) + leaq 32(%rbp),%rbp + decq %rcx + jnz .Lsqr4x_copy + + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movq 56(%rsp),%rsi + movq $1,%rax + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lsqr4x_epilogue: + .byte 0xf3,0xc3 +.size bn_sqr4x_mont,.-bn_sqr4x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/app/openssl/crypto/bn/asm/x86_64-mont.pl b/app/openssl/crypto/bn/asm/x86_64-mont.pl index 3b7a6f24..17fb94c8 100755 --- a/app/openssl/crypto/bn/asm/x86_64-mont.pl +++ b/app/openssl/crypto/bn/asm/x86_64-mont.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -15,6 +15,20 @@ # respectful 50%. It remains to be seen if loop unrolling and # dedicated squaring routine can provide further improvement... +# July 2011. +# +# Add dedicated squaring procedure. Performance improvement varies +# from platform to platform, but in average it's ~5%/15%/25%/33% +# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively. + +# August 2011. +# +# Unroll and modulo-schedule inner loops in such manner that they +# are "fallen through" for input lengths of 8, which is critical for +# 1024-bit RSA *sign*. Average performance improvement in comparison +# to *initial* version of this module from 2005 is ~0%/30%/40%/45% +# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively. + $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } @@ -26,7 +40,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; # int bn_mul_mont( $rp="%rdi"; # BN_ULONG *rp, @@ -37,7 +52,6 @@ $n0="%r8"; # const BN_ULONG *n0, $num="%r9"; # int num); $lo0="%r10"; $hi0="%r11"; -$bp="%r12"; # reassign $bp $hi1="%r13"; $i="%r14"; $j="%r15"; @@ -51,6 +65,16 @@ $code=<<___; .type bn_mul_mont,\@function,6 .align 16 bn_mul_mont: + test \$3,${num}d + jnz .Lmul_enter + cmp \$8,${num}d + jb .Lmul_enter + cmp $ap,$bp + jne .Lmul4x_enter + jmp .Lsqr4x_enter + +.align 16 +.Lmul_enter: push %rbx push %rbp push %r12 @@ -66,48 +90,66 @@ bn_mul_mont: and \$-1024,%rsp # minimize TLB usage mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp -.Lprologue: - mov %rdx,$bp # $bp reassigned, remember? - +.Lmul_body: + mov $bp,%r12 # reassign $bp +___ + $bp="%r12"; +$code.=<<___; mov ($n0),$n0 # pull n0[0] value + mov ($bp),$m0 # m0=bp[0] + mov ($ap),%rax xor $i,$i # i=0 xor $j,$j # j=0 - mov ($bp),$m0 # m0=bp[0] - mov ($ap),%rax + mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$lo0 - mov %rdx,$hi0 + mov ($np),%rax - imulq $n0,%rax # "tp[0]"*n0 - mov %rax,$m1 + imulq $lo0,$m1 # "tp[0]"*n0 + mov %rdx,$hi0 - mulq ($np) # np[0]*m1 - add $lo0,%rax # discarded + mulq $m1 # np[0]*m1 + add %rax,$lo0 # discarded + mov 8($ap),%rax adc \$0,%rdx mov %rdx,$hi1 lea 1($j),$j # j++ + jmp .L1st_enter + +.align 16 .L1st: + add %rax,$hi1 mov ($ap,$j,8),%rax - mulq $m0 # ap[j]*bp[0] - add $hi0,%rax adc \$0,%rdx - mov %rax,$lo0 + add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] + mov $lo0,$hi0 + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + +.L1st_enter: + mulq $m0 # ap[j]*bp[0] + add %rax,$hi0 mov ($np,$j,8),%rax - mov %rdx,$hi0 + adc \$0,%rdx + lea 1($j),$j # j++ + mov %rdx,$lo0 mulq $m1 # np[j]*m1 - add $hi1,%rax - lea 1($j),$j # j++ + cmp $num,$j + jne .L1st + + add %rax,$hi1 + mov ($ap),%rax # ap[0] adc \$0,%rdx - add $lo0,%rax # np[j]*m1+ap[j]*bp[0] + add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov %rax,-16(%rsp,$j,8) # tp[j-1] - cmp $num,$j + mov $hi1,-16(%rsp,$j,8) # tp[j-1] mov %rdx,$hi1 - jl .L1st + mov $lo0,$hi0 xor %rdx,%rdx add $hi0,$hi1 @@ -116,50 +158,64 @@ bn_mul_mont: mov %rdx,(%rsp,$num,8) # store upmost overflow bit lea 1($i),$i # i++ -.align 4 + jmp .Louter +.align 16 .Louter: - xor $j,$j # j=0 - mov ($bp,$i,8),$m0 # m0=bp[i] - mov ($ap),%rax # ap[0] + xor $j,$j # j=0 + mov $n0,$m1 + mov (%rsp),$lo0 mulq $m0 # ap[0]*bp[i] - add (%rsp),%rax # ap[0]*bp[i]+tp[0] + add %rax,$lo0 # ap[0]*bp[i]+tp[0] + mov ($np),%rax adc \$0,%rdx - mov %rax,$lo0 - mov %rdx,$hi0 - imulq $n0,%rax # tp[0]*n0 - mov %rax,$m1 + imulq $lo0,$m1 # tp[0]*n0 + mov %rdx,$hi0 - mulq ($np,$j,8) # np[0]*m1 - add $lo0,%rax # discarded - mov 8(%rsp),$lo0 # tp[1] + mulq $m1 # np[0]*m1 + add %rax,$lo0 # discarded + mov 8($ap),%rax adc \$0,%rdx + mov 8(%rsp),$lo0 # tp[1] mov %rdx,$hi1 lea 1($j),$j # j++ -.align 4 + jmp .Linner_enter + +.align 16 .Linner: + add %rax,$hi1 mov ($ap,$j,8),%rax - mulq $m0 # ap[j]*bp[i] - add $hi0,%rax adc \$0,%rdx - add %rax,$lo0 # ap[j]*bp[i]+tp[j] + add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] + mov (%rsp,$j,8),$lo0 + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + +.Linner_enter: + mulq $m0 # ap[j]*bp[i] + add %rax,$hi0 mov ($np,$j,8),%rax adc \$0,%rdx + add $hi0,$lo0 # ap[j]*bp[i]+tp[j] mov %rdx,$hi0 + adc \$0,$hi0 + lea 1($j),$j # j++ mulq $m1 # np[j]*m1 - add $hi1,%rax - lea 1($j),$j # j++ - adc \$0,%rdx - add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j] + cmp $num,$j + jne .Linner + + add %rax,$hi1 + mov ($ap),%rax # ap[0] adc \$0,%rdx + add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] mov (%rsp,$j,8),$lo0 - cmp $num,$j - mov %rax,-16(%rsp,$j,8) # tp[j-1] + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] mov %rdx,$hi1 - jl .Linner xor %rdx,%rdx add $hi0,$hi1 @@ -173,35 +229,449 @@ bn_mul_mont: cmp $num,$i jl .Louter - lea (%rsp),$ap # borrow ap for tp - lea -1($num),$j # j=num-1 - - mov ($ap),%rax # tp[0] xor $i,$i # i=0 and clear CF! + mov (%rsp),%rax # tp[0] + lea (%rsp),$ap # borrow ap for tp + mov $num,$j # j=num jmp .Lsub .align 16 .Lsub: sbb ($np,$i,8),%rax mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] - dec $j # doesn't affect CF! mov 8($ap,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - jge .Lsub + dec $j # doesnn't affect CF! + jnz .Lsub sbb \$0,%rax # handle upmost overflow bit + xor $i,$i and %rax,$ap not %rax mov $rp,$np and %rax,$np - lea -1($num),$j + mov $num,$j # j=num or $np,$ap # ap=borrow?tp:rp .align 16 .Lcopy: # copy or in-place refresh + mov ($ap,$i,8),%rax + mov $i,(%rsp,$i,8) # zap temporary vector + mov %rax,($rp,$i,8) # rp[i]=tp[i] + lea 1($i),$i + sub \$1,$j + jnz .Lcopy + + mov 8(%rsp,$num,8),%rsi # restore %rsp + mov \$1,%rax + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lmul_epilogue: + ret +.size bn_mul_mont,.-bn_mul_mont +___ +{{{ +my @A=("%r10","%r11"); +my @N=("%r13","%rdi"); +$code.=<<___; +.type bn_mul4x_mont,\@function,6 +.align 16 +bn_mul4x_mont: +.Lmul4x_enter: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + mov ${num}d,${num}d + lea 4($num),%r10 + mov %rsp,%r11 + neg %r10 + lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+4)) + and \$-1024,%rsp # minimize TLB usage + + mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp +.Lmul4x_body: + mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp + mov %rdx,%r12 # reassign $bp +___ + $bp="%r12"; +$code.=<<___; + mov ($n0),$n0 # pull n0[0] value + mov ($bp),$m0 # m0=bp[0] + mov ($ap),%rax + + xor $i,$i # i=0 + xor $j,$j # j=0 + + mov $n0,$m1 + mulq $m0 # ap[0]*bp[0] + mov %rax,$A[0] + mov ($np),%rax + + imulq $A[0],$m1 # "tp[0]"*n0 + mov %rdx,$A[1] + + mulq $m1 # np[0]*m1 + add %rax,$A[0] # discarded + mov 8($ap),%rax + adc \$0,%rdx + mov %rdx,$N[1] + + mulq $m0 + add %rax,$A[1] + mov 8($np),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 + add %rax,$N[1] + mov 16($ap),%rax + adc \$0,%rdx + add $A[1],$N[1] + lea 4($j),$j # j++ + adc \$0,%rdx + mov $N[1],(%rsp) + mov %rdx,$N[0] + jmp .L1st4x +.align 16 +.L1st4x: + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] mov ($ap,$j,8),%rax - mov %rax,($rp,$j,8) # rp[i]=tp[i] - mov $i,(%rsp,$j,8) # zap temporary vector + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov ($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov 8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-8(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov 8($np,$j,8),%rax + adc \$0,%rdx + lea 4($j),$j # j++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov -16($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + cmp $num,$j + jl .L1st4x + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + xor $N[1],$N[1] + add $A[0],$N[0] + adc \$0,$N[1] + mov $N[0],-8(%rsp,$j,8) + mov $N[1],(%rsp,$j,8) # store upmost overflow bit + + lea 1($i),$i # i++ +.align 4 +.Louter4x: + mov ($bp,$i,8),$m0 # m0=bp[i] + xor $j,$j # j=0 + mov (%rsp),$A[0] + mov $n0,$m1 + mulq $m0 # ap[0]*bp[i] + add %rax,$A[0] # ap[0]*bp[i]+tp[0] + mov ($np),%rax + adc \$0,%rdx + + imulq $A[0],$m1 # tp[0]*n0 + mov %rdx,$A[1] + + mulq $m1 # np[0]*m1 + add %rax,$A[0] # "$N[0]", discarded + mov 8($ap),%rax + adc \$0,%rdx + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov 8($np),%rax + adc \$0,%rdx + add 8(%rsp),$A[1] # +tp[1] + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov 16($ap),%rax + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] + lea 4($j),$j # j+=2 + adc \$0,%rdx + mov $N[1],(%rsp) # tp[j-1] + mov %rdx,$N[0] + jmp .Linner4x +.align 16 +.Linner4x: + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + add -8(%rsp,$j,8),$A[1] + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov ($np,$j,8),%rax + adc \$0,%rdx + add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov 8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[0],-8(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov 8($np,$j,8),%rax + adc \$0,%rdx + add 8(%rsp,$j,8),$A[1] + adc \$0,%rdx + lea 4($j),$j # j++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov -16($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + cmp $num,$j + jl .Linner4x + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + add -8(%rsp,$j,8),$A[1] + adc \$0,%rdx + lea 1($i),$i # i++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + xor $N[1],$N[1] + add $A[0],$N[0] + adc \$0,$N[1] + add (%rsp,$num,8),$N[0] # pull upmost overflow bit + adc \$0,$N[1] + mov $N[0],-8(%rsp,$j,8) + mov $N[1],(%rsp,$j,8) # store upmost overflow bit + + cmp $num,$i + jl .Louter4x +___ +{ +my @ri=("%rax","%rdx",$m0,$m1); +$code.=<<___; + mov 16(%rsp,$num,8),$rp # restore $rp + mov 0(%rsp),@ri[0] # tp[0] + pxor %xmm0,%xmm0 + mov 8(%rsp),@ri[1] # tp[1] + shr \$2,$num # num/=4 + lea (%rsp),$ap # borrow ap for tp + xor $i,$i # i=0 and clear CF! + + sub 0($np),@ri[0] + mov 16($ap),@ri[2] # tp[2] + mov 24($ap),@ri[3] # tp[3] + sbb 8($np),@ri[1] + lea -1($num),$j # j=num/4-1 + jmp .Lsub4x +.align 16 +.Lsub4x: + mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 16($np,$i,8),@ri[2] + mov 32($ap,$i,8),@ri[0] # tp[i+1] + mov 40($ap,$i,8),@ri[1] + sbb 24($np,$i,8),@ri[3] + mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 32($np,$i,8),@ri[0] + mov 48($ap,$i,8),@ri[2] + mov 56($ap,$i,8),@ri[3] + sbb 40($np,$i,8),@ri[1] + lea 4($i),$i # i++ + dec $j # doesnn't affect CF! + jnz .Lsub4x + + mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] + mov 32($ap,$i,8),@ri[0] # load overflow bit + sbb 16($np,$i,8),@ri[2] + mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 24($np,$i,8),@ri[3] + mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] + + sbb \$0,@ri[0] # handle upmost overflow bit + mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] + xor $i,$i # i=0 + and @ri[0],$ap + not @ri[0] + mov $rp,$np + and @ri[0],$np + lea -1($num),$j + or $np,$ap # ap=borrow?tp:rp + + movdqu ($ap),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,($rp) + jmp .Lcopy4x +.align 16 +.Lcopy4x: # copy or in-place refresh + movdqu 16($ap,$i),%xmm2 + movdqu 32($ap,$i),%xmm1 + movdqa %xmm0,16(%rsp,$i) + movdqu %xmm2,16($rp,$i) + movdqa %xmm0,32(%rsp,$i) + movdqu %xmm1,32($rp,$i) + lea 32($i),$i dec $j - jge .Lcopy + jnz .Lcopy4x + shl \$2,$num + movdqu 16($ap,$i),%xmm2 + movdqa %xmm0,16(%rsp,$i) + movdqu %xmm2,16($rp,$i) +___ +} +$code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax mov (%rsi),%r15 @@ -211,9 +681,823 @@ bn_mul_mont: mov 32(%rsi),%rbp mov 40(%rsi),%rbx lea 48(%rsi),%rsp -.Lepilogue: +.Lmul4x_epilogue: ret -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul4x_mont,.-bn_mul4x_mont +___ +}}} + {{{ +###################################################################### +# void bn_sqr4x_mont( +my $rptr="%rdi"; # const BN_ULONG *rptr, +my $aptr="%rsi"; # const BN_ULONG *aptr, +my $bptr="%rdx"; # not used +my $nptr="%rcx"; # const BN_ULONG *nptr, +my $n0 ="%r8"; # const BN_ULONG *n0); +my $num ="%r9"; # int num, has to be divisible by 4 and + # not less than 8 + +my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); +my @A0=("%r10","%r11"); +my @A1=("%r12","%r13"); +my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); + +$code.=<<___; +.type bn_sqr4x_mont,\@function,6 +.align 16 +bn_sqr4x_mont: +.Lsqr4x_enter: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + shl \$3,${num}d # convert $num to bytes + xor %r10,%r10 + mov %rsp,%r11 # put aside %rsp + sub $num,%r10 # -$num + mov ($n0),$n0 # *n0 + lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num) + and \$-1024,%rsp # minimize TLB usage + ############################################################## + # Stack layout + # + # +0 saved $num, used in reduction section + # +8 &t[2*$num], used in reduction section + # +32 saved $rptr + # +40 saved $nptr + # +48 saved *n0 + # +56 saved %rsp + # +64 t[2*$num] + # + mov $rptr,32(%rsp) # save $rptr + mov $nptr,40(%rsp) + mov $n0, 48(%rsp) + mov %r11, 56(%rsp) # save original %rsp +.Lsqr4x_body: + ############################################################## + # Squaring part: + # + # a) multiply-n-add everything but a[i]*a[i]; + # b) shift result of a) by 1 to the left and accumulate + # a[i]*a[i] products; + # + lea 32(%r10),$i # $i=-($num-32) + lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2] + + mov $num,$j # $j=$num + + # comments apply to $num==8 case + mov -32($aptr,$i),$a0 # a[0] + lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr,$i),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr,$i),$ai # a[2] + mov %rax,$a1 + + mul $a0 # a[1]*a[0] + mov %rax,$A0[0] # a[1]*a[0] + mov $ai,%rax # a[2] + mov %rdx,$A0[1] + mov $A0[0],-24($tptr,$i) # t[1] + + xor $A0[0],$A0[0] + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + adc %rdx,$A0[0] + mov $A0[1],-16($tptr,$i) # t[2] + + lea -16($i),$j # j=-16 + + + mov 8($aptr,$j),$ai # a[3] + mul $a1 # a[2]*a[1] + mov %rax,$A1[0] # a[2]*a[1]+t[3] + mov $ai,%rax + mov %rdx,$A1[1] + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + lea 16($j),$j + adc \$0,$A0[1] + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],-8($tptr,$j) # t[3] + jmp .Lsqr4x_1st + +.align 16 +.Lsqr4x_1st: + mov ($aptr,$j),$ai # a[4] + xor $A1[0],$A1[0] + mul $a1 # a[3]*a[1] + add %rax,$A1[1] # a[3]*a[1]+t[4] + mov $ai,%rax + adc %rdx,$A1[0] + + xor $A0[0],$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + mul $a0 # a[4]*a[0] + add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] + mov $ai,%rax # a[3] + adc %rdx,$A0[0] + mov $A0[1],($tptr,$j) # t[4] + + + mov 8($aptr,$j),$ai # a[5] + xor $A1[1],$A1[1] + mul $a1 # a[4]*a[3] + add %rax,$A1[0] # a[4]*a[3]+t[5] + mov $ai,%rax + adc %rdx,$A1[1] + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + mul $a0 # a[5]*a[2] + add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],8($tptr,$j) # t[5] + + mov 16($aptr,$j),$ai # a[6] + xor $A1[0],$A1[0] + mul $a1 # a[5]*a[3] + add %rax,$A1[1] # a[5]*a[3]+t[6] + mov $ai,%rax + adc %rdx,$A1[0] + + xor $A0[0],$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + mul $a0 # a[6]*a[2] + add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6] + mov $ai,%rax # a[3] + adc %rdx,$A0[0] + mov $A0[1],16($tptr,$j) # t[6] + + + mov 24($aptr,$j),$ai # a[7] + xor $A1[1],$A1[1] + mul $a1 # a[6]*a[5] + add %rax,$A1[0] # a[6]*a[5]+t[7] + mov $ai,%rax + adc %rdx,$A1[1] + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + lea 32($j),$j + adc \$0,$A0[1] + mul $a0 # a[7]*a[4] + add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],-8($tptr,$j) # t[7] + + cmp \$0,$j + jne .Lsqr4x_1st + + xor $A1[0],$A1[0] + add $A0[1],$A1[1] + adc \$0,$A1[0] + mul $a1 # a[7]*a[5] + add %rax,$A1[1] + adc %rdx,$A1[0] + + mov $A1[1],($tptr) # t[8] + lea 16($i),$i + mov $A1[0],8($tptr) # t[9] + jmp .Lsqr4x_outer + +.align 16 +.Lsqr4x_outer: # comments apply to $num==6 case + mov -32($aptr,$i),$a0 # a[0] + lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr,$i),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr,$i),$ai # a[2] + mov %rax,$a1 + + mov -24($tptr,$i),$A0[0] # t[1] + xor $A0[1],$A0[1] + mul $a0 # a[1]*a[0] + add %rax,$A0[0] # a[1]*a[0]+t[1] + mov $ai,%rax # a[2] + adc %rdx,$A0[1] + mov $A0[0],-24($tptr,$i) # t[1] + + xor $A0[0],$A0[0] + add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2] + adc \$0,$A0[0] + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + adc %rdx,$A0[0] + mov $A0[1],-16($tptr,$i) # t[2] + + lea -16($i),$j # j=-16 + xor $A1[0],$A1[0] + + + mov 8($aptr,$j),$ai # a[3] + xor $A1[1],$A1[1] + add 8($tptr,$j),$A1[0] + adc \$0,$A1[1] + mul $a1 # a[2]*a[1] + add %rax,$A1[0] # a[2]*a[1]+t[3] + mov $ai,%rax + adc %rdx,$A1[1] + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],8($tptr,$j) # t[3] + + lea 16($j),$j + jmp .Lsqr4x_inner + +.align 16 +.Lsqr4x_inner: + mov ($aptr,$j),$ai # a[4] + xor $A1[0],$A1[0] + add ($tptr,$j),$A1[1] + adc \$0,$A1[0] + mul $a1 # a[3]*a[1] + add %rax,$A1[1] # a[3]*a[1]+t[4] + mov $ai,%rax + adc %rdx,$A1[0] + + xor $A0[0],$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + mul $a0 # a[4]*a[0] + add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] + mov $ai,%rax # a[3] + adc %rdx,$A0[0] + mov $A0[1],($tptr,$j) # t[4] + + mov 8($aptr,$j),$ai # a[5] + xor $A1[1],$A1[1] + add 8($tptr,$j),$A1[0] + adc \$0,$A1[1] + mul $a1 # a[4]*a[3] + add %rax,$A1[0] # a[4]*a[3]+t[5] + mov $ai,%rax + adc %rdx,$A1[1] + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + lea 16($j),$j # j++ + adc \$0,$A0[1] + mul $a0 # a[5]*a[2] + add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below + + cmp \$0,$j + jne .Lsqr4x_inner + + xor $A1[0],$A1[0] + add $A0[1],$A1[1] + adc \$0,$A1[0] + mul $a1 # a[5]*a[3] + add %rax,$A1[1] + adc %rdx,$A1[0] + + mov $A1[1],($tptr) # t[6], "preloaded t[2]" below + mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below + + add \$16,$i + jnz .Lsqr4x_outer + + # comments apply to $num==4 case + mov -32($aptr),$a0 # a[0] + lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr),$ai # a[2] + mov %rax,$a1 + + xor $A0[1],$A0[1] + mul $a0 # a[1]*a[0] + add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1] + mov $ai,%rax # a[2] + adc %rdx,$A0[1] + mov $A0[0],-24($tptr) # t[1] + + xor $A0[0],$A0[0] + add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2] + adc \$0,$A0[0] + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + adc %rdx,$A0[0] + mov $A0[1],-16($tptr) # t[2] + + mov -8($aptr),$ai # a[3] + mul $a1 # a[2]*a[1] + add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3] + mov $ai,%rax + adc \$0,%rdx + + xor $A0[1],$A0[1] + add $A1[0],$A0[0] + mov %rdx,$A1[1] + adc \$0,$A0[1] + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + adc %rdx,$A0[1] + mov $A0[0],-8($tptr) # t[3] + + xor $A1[0],$A1[0] + add $A0[1],$A1[1] + adc \$0,$A1[0] + mul $a1 # a[3]*a[1] + add %rax,$A1[1] + mov -16($aptr),%rax # a[2] + adc %rdx,$A1[0] + + mov $A1[1],($tptr) # t[4] + mov $A1[0],8($tptr) # t[5] + + mul $ai # a[2]*a[3] +___ +{ +my ($shift,$carry)=($a0,$a1); +my @S=(@A1,$ai,$n0); +$code.=<<___; + add \$16,$i + xor $shift,$shift + sub $num,$i # $i=16-$num + xor $carry,$carry + + add $A1[0],%rax # t[5] + adc \$0,%rdx + mov %rax,8($tptr) # t[5] + mov %rdx,16($tptr) # t[6] + mov $carry,24($tptr) # t[7] + + mov -16($aptr,$i),%rax # a[0] + lea 64(%rsp,$num,2),$tptr + xor $A0[0],$A0[0] # t[0] + mov -24($tptr,$i,2),$A0[1] # t[1] + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],-32($tptr,$i,2) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],-24($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 0($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],-16($tptr,$i,2) + adc %rdx,$S[3] + lea 16($i),$i + mov $S[3],-40($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + jmp .Lsqr4x_shift_n_add + +.align 16 +.Lsqr4x_shift_n_add: + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],-32($tptr,$i,2) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],-24($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 0($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],-16($tptr,$i,2) + adc %rdx,$S[3] + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + mov $S[3],-8($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov 8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],0($tptr,$i,2) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],8($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 16($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],16($tptr,$i,2) + adc %rdx,$S[3] + mov $S[3],24($tptr,$i,2) + sbb $carry,$carry # mov cf,$carry + add \$32,$i + jnz .Lsqr4x_shift_n_add + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov -16($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr),%rax # a[i+1] # prefetch + mov $S[0],-32($tptr) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift + mov $S[1],-24($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + adc %rax,$S[2] + adc %rdx,$S[3] + mov $S[2],-16($tptr) + mov $S[3],-8($tptr) +___ +} +############################################################## +# Montgomery reduction part, "word-by-word" algorithm. +# +{ +my ($topbit,$nptr)=("%rbp",$aptr); +my ($m0,$m1)=($a0,$a1); +my @Ni=("%rbx","%r9"); +$code.=<<___; + mov 40(%rsp),$nptr # restore $nptr + mov 48(%rsp),$n0 # restore *n0 + xor $j,$j + mov $num,0(%rsp) # save $num + sub $num,$j # $j=-$num + mov 64(%rsp),$A0[0] # t[0] # modsched # + mov $n0,$m0 # # modsched # + lea 64(%rsp,$num,2),%rax # end of t[] buffer + lea 64(%rsp,$num),$tptr # end of t[] window + mov %rax,8(%rsp) # save end of t[] buffer + lea ($nptr,$num),$nptr # end of n[] buffer + xor $topbit,$topbit # $topbit=0 + + mov 0($nptr,$j),%rax # n[0] # modsched # + mov 8($nptr,$j),$Ni[1] # n[1] # modsched # + imulq $A0[0],$m0 # m0=t[0]*n0 # modsched # + mov %rax,$Ni[0] # # modsched # + jmp .Lsqr4x_mont_outer + +.align 16 +.Lsqr4x_mont_outer: + xor $A0[1],$A0[1] + mul $m0 # n[0]*m0 + add %rax,$A0[0] # n[0]*m0+t[0] + mov $Ni[1],%rax + adc %rdx,$A0[1] + mov $n0,$m1 + + xor $A0[0],$A0[0] + add 8($tptr,$j),$A0[1] + adc \$0,$A0[0] + mul $m0 # n[1]*m0 + add %rax,$A0[1] # n[1]*m0+t[1] + mov $Ni[0],%rax + adc %rdx,$A0[0] + + imulq $A0[1],$m1 + + mov 16($nptr,$j),$Ni[0] # n[2] + xor $A1[1],$A1[1] + add $A0[1],$A1[0] + adc \$0,$A1[1] + mul $m1 # n[0]*m1 + add %rax,$A1[0] # n[0]*m1+"t[1]" + mov $Ni[0],%rax + adc %rdx,$A1[1] + mov $A1[0],8($tptr,$j) # "t[1]" + + xor $A0[1],$A0[1] + add 16($tptr,$j),$A0[0] + adc \$0,$A0[1] + mul $m0 # n[2]*m0 + add %rax,$A0[0] # n[2]*m0+t[2] + mov $Ni[1],%rax + adc %rdx,$A0[1] + + mov 24($nptr,$j),$Ni[1] # n[3] + xor $A1[0],$A1[0] + add $A0[0],$A1[1] + adc \$0,$A1[0] + mul $m1 # n[1]*m1 + add %rax,$A1[1] # n[1]*m1+"t[2]" + mov $Ni[1],%rax + adc %rdx,$A1[0] + mov $A1[1],16($tptr,$j) # "t[2]" + + xor $A0[0],$A0[0] + add 24($tptr,$j),$A0[1] + lea 32($j),$j + adc \$0,$A0[0] + mul $m0 # n[3]*m0 + add %rax,$A0[1] # n[3]*m0+t[3] + mov $Ni[0],%rax + adc %rdx,$A0[0] + jmp .Lsqr4x_mont_inner + +.align 16 +.Lsqr4x_mont_inner: + mov ($nptr,$j),$Ni[0] # n[4] + xor $A1[1],$A1[1] + add $A0[1],$A1[0] + adc \$0,$A1[1] + mul $m1 # n[2]*m1 + add %rax,$A1[0] # n[2]*m1+"t[3]" + mov $Ni[0],%rax + adc %rdx,$A1[1] + mov $A1[0],-8($tptr,$j) # "t[3]" + + xor $A0[1],$A0[1] + add ($tptr,$j),$A0[0] + adc \$0,$A0[1] + mul $m0 # n[4]*m0 + add %rax,$A0[0] # n[4]*m0+t[4] + mov $Ni[1],%rax + adc %rdx,$A0[1] + + mov 8($nptr,$j),$Ni[1] # n[5] + xor $A1[0],$A1[0] + add $A0[0],$A1[1] + adc \$0,$A1[0] + mul $m1 # n[3]*m1 + add %rax,$A1[1] # n[3]*m1+"t[4]" + mov $Ni[1],%rax + adc %rdx,$A1[0] + mov $A1[1],($tptr,$j) # "t[4]" + + xor $A0[0],$A0[0] + add 8($tptr,$j),$A0[1] + adc \$0,$A0[0] + mul $m0 # n[5]*m0 + add %rax,$A0[1] # n[5]*m0+t[5] + mov $Ni[0],%rax + adc %rdx,$A0[0] + + + mov 16($nptr,$j),$Ni[0] # n[6] + xor $A1[1],$A1[1] + add $A0[1],$A1[0] + adc \$0,$A1[1] + mul $m1 # n[4]*m1 + add %rax,$A1[0] # n[4]*m1+"t[5]" + mov $Ni[0],%rax + adc %rdx,$A1[1] + mov $A1[0],8($tptr,$j) # "t[5]" + + xor $A0[1],$A0[1] + add 16($tptr,$j),$A0[0] + adc \$0,$A0[1] + mul $m0 # n[6]*m0 + add %rax,$A0[0] # n[6]*m0+t[6] + mov $Ni[1],%rax + adc %rdx,$A0[1] + + mov 24($nptr,$j),$Ni[1] # n[7] + xor $A1[0],$A1[0] + add $A0[0],$A1[1] + adc \$0,$A1[0] + mul $m1 # n[5]*m1 + add %rax,$A1[1] # n[5]*m1+"t[6]" + mov $Ni[1],%rax + adc %rdx,$A1[0] + mov $A1[1],16($tptr,$j) # "t[6]" + + xor $A0[0],$A0[0] + add 24($tptr,$j),$A0[1] + lea 32($j),$j + adc \$0,$A0[0] + mul $m0 # n[7]*m0 + add %rax,$A0[1] # n[7]*m0+t[7] + mov $Ni[0],%rax + adc %rdx,$A0[0] + cmp \$0,$j + jne .Lsqr4x_mont_inner + + sub 0(%rsp),$j # $j=-$num # modsched # + mov $n0,$m0 # # modsched # + + xor $A1[1],$A1[1] + add $A0[1],$A1[0] + adc \$0,$A1[1] + mul $m1 # n[6]*m1 + add %rax,$A1[0] # n[6]*m1+"t[7]" + mov $Ni[1],%rax + adc %rdx,$A1[1] + mov $A1[0],-8($tptr) # "t[7]" + + xor $A0[1],$A0[1] + add ($tptr),$A0[0] # +t[8] + adc \$0,$A0[1] + mov 0($nptr,$j),$Ni[0] # n[0] # modsched # + add $topbit,$A0[0] + adc \$0,$A0[1] + + imulq 16($tptr,$j),$m0 # m0=t[0]*n0 # modsched # + xor $A1[0],$A1[0] + mov 8($nptr,$j),$Ni[1] # n[1] # modsched # + add $A0[0],$A1[1] + mov 16($tptr,$j),$A0[0] # t[0] # modsched # + adc \$0,$A1[0] + mul $m1 # n[7]*m1 + add %rax,$A1[1] # n[7]*m1+"t[8]" + mov $Ni[0],%rax # # modsched # + adc %rdx,$A1[0] + mov $A1[1],($tptr) # "t[8]" + + xor $topbit,$topbit + add 8($tptr),$A1[0] # +t[9] + adc $topbit,$topbit + add $A0[1],$A1[0] + lea 16($tptr),$tptr # "t[$num]>>128" + adc \$0,$topbit + mov $A1[0],-8($tptr) # "t[9]" + cmp 8(%rsp),$tptr # are we done? + jb .Lsqr4x_mont_outer + + mov 0(%rsp),$num # restore $num + mov $topbit,($tptr) # save $topbit +___ +} +############################################################## +# Post-condition, 4x unrolled copy from bn_mul_mont +# +{ +my ($tptr,$nptr)=("%rbx",$aptr); +my @ri=("%rax","%rdx","%r10","%r11"); +$code.=<<___; + mov 64(%rsp,$num),@ri[0] # tp[0] + lea 64(%rsp,$num),$tptr # upper half of t[2*$num] holds result + mov 40(%rsp),$nptr # restore $nptr + shr \$5,$num # num/4 + mov 8($tptr),@ri[1] # t[1] + xor $i,$i # i=0 and clear CF! + + mov 32(%rsp),$rptr # restore $rptr + sub 0($nptr),@ri[0] + mov 16($tptr),@ri[2] # t[2] + mov 24($tptr),@ri[3] # t[3] + sbb 8($nptr),@ri[1] + lea -1($num),$j # j=num/4-1 + jmp .Lsqr4x_sub +.align 16 +.Lsqr4x_sub: + mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] + sbb 16($nptr,$i,8),@ri[2] + mov 32($tptr,$i,8),@ri[0] # tp[i+1] + mov 40($tptr,$i,8),@ri[1] + sbb 24($nptr,$i,8),@ri[3] + mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] + sbb 32($nptr,$i,8),@ri[0] + mov 48($tptr,$i,8),@ri[2] + mov 56($tptr,$i,8),@ri[3] + sbb 40($nptr,$i,8),@ri[1] + lea 4($i),$i # i++ + dec $j # doesn't affect CF! + jnz .Lsqr4x_sub + + mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] + mov 32($tptr,$i,8),@ri[0] # load overflow bit + sbb 16($nptr,$i,8),@ri[2] + mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] + sbb 24($nptr,$i,8),@ri[3] + mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] + + sbb \$0,@ri[0] # handle upmost overflow bit + mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] + xor $i,$i # i=0 + and @ri[0],$tptr + not @ri[0] + mov $rptr,$nptr + and @ri[0],$nptr + lea -1($num),$j + or $nptr,$tptr # tp=borrow?tp:rp + + pxor %xmm0,%xmm0 + lea 64(%rsp,$num,8),$nptr + movdqu ($tptr),%xmm1 + lea ($nptr,$num,8),$nptr + movdqa %xmm0,64(%rsp) # zap lower half of temporary vector + movdqa %xmm0,($nptr) # zap upper half of temporary vector + movdqu %xmm1,($rptr) + jmp .Lsqr4x_copy +.align 16 +.Lsqr4x_copy: # copy or in-place refresh + movdqu 16($tptr,$i),%xmm2 + movdqu 32($tptr,$i),%xmm1 + movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector + movdqa %xmm0,96(%rsp,$i) # zap lower half of temporary vector + movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector + movdqa %xmm0,32($nptr,$i) # zap upper half of temporary vector + movdqu %xmm2,16($rptr,$i) + movdqu %xmm1,32($rptr,$i) + lea 32($i),$i + dec $j + jnz .Lsqr4x_copy + + movdqu 16($tptr,$i),%xmm2 + movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector + movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector + movdqu %xmm2,16($rptr,$i) +___ +} +$code.=<<___; + mov 56(%rsp),%rsi # restore %rsp + mov \$1,%rax + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lsqr4x_epilogue: + ret +.size bn_sqr4x_mont,.-bn_sqr4x_mont +___ +}}} +$code.=<<___; .asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by " .align 16 ___ @@ -228,9 +1512,9 @@ $disp="%r9"; $code.=<<___; .extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent +.type mul_handler,\@abi-omnipotent .align 16 -se_handler: +mul_handler: push %rsi push %rdi push %rbx @@ -245,15 +1529,20 @@ se_handler: mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRsp - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail mov 192($context),%r10 # pull $num mov 8(%rax,%r10,8),%rax # pull saved stack pointer @@ -272,7 +1561,53 @@ se_handler: mov %r14,232($context) # restore context->R14 mov %r15,240($context) # restore context->R15 -.Lin_prologue: + jmp .Lcommon_seh_tail +.size mul_handler,.-mul_handler + +.type sqr_handler,\@abi-omnipotent +.align 16 +sqr_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lsqr4x_body(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lsqr_body + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + lea .Lsqr4x_epilogue(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue + jae .Lcommon_seh_tail + + mov 56(%rax),%rax # pull saved stack pointer + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lcommon_seh_tail: mov 8(%rax),%rdi mov 16(%rax),%rsi mov %rax,152($context) # restore context->Rsp @@ -310,7 +1645,7 @@ se_handler: pop %rdi pop %rsi ret -.size se_handler,.-se_handler +.size sqr_handler,.-sqr_handler .section .pdata .align 4 @@ -318,11 +1653,27 @@ se_handler: .rva .LSEH_end_bn_mul_mont .rva .LSEH_info_bn_mul_mont + .rva .LSEH_begin_bn_mul4x_mont + .rva .LSEH_end_bn_mul4x_mont + .rva .LSEH_info_bn_mul4x_mont + + .rva .LSEH_begin_bn_sqr4x_mont + .rva .LSEH_end_bn_sqr4x_mont + .rva .LSEH_info_bn_sqr4x_mont + .section .xdata .align 8 .LSEH_info_bn_mul_mont: .byte 9,0,0,0 - .rva se_handler + .rva mul_handler + .rva .Lmul_body,.Lmul_epilogue # HandlerData[] +.LSEH_info_bn_mul4x_mont: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmul4x_body,.Lmul4x_epilogue # HandlerData[] +.LSEH_info_bn_sqr4x_mont: + .byte 9,0,0,0 + .rva sqr_handler ___ } diff --git a/app/openssl/crypto/bn/asm/x86_64-mont5.S b/app/openssl/crypto/bn/asm/x86_64-mont5.S new file mode 100644 index 00000000..49ec6ac6 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86_64-mont5.S @@ -0,0 +1,784 @@ +.text + +.globl bn_mul_mont_gather5 +.type bn_mul_mont_gather5,@function +.align 64 +bn_mul_mont_gather5: + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + movl %r9d,%r9d + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%rax + leaq 2(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jl .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.align 16 +.Lcopy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.type bn_mul4x_mont_gather5,@function +.align 16 +bn_mul4x_mont_gather5: +.Lmul4x_enter: + movl %r9d,%r9d + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%rax + leaq 4(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + xorq %r15,%r15 + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-40(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + movq %rdi,-16(%rsp,%r15,8) + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jl .Louter4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp .Lsub4x +.align 16 +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 + decq %r15 + jnz .Lcopy4x + + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 +.globl bn_scatter5 +.type bn_scatter5,@function +.align 16 +bn_scatter5: + cmpq $0,%rsi + jz .Lscatter_epilogue + leaq (%rdx,%rcx,8),%rdx +.Lscatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subq $1,%rsi + jnz .Lscatter +.Lscatter_epilogue: + .byte 0xf3,0xc3 +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.type bn_gather5,@function +.align 16 +bn_gather5: + movq %rcx,%r11 + shrq $3,%rcx + andq $7,%r11 + notq %rcx + leaq .Lmagic_masks(%rip),%rax + andq $3,%rcx + leaq 96(%rdx,%r11,8),%rdx + movq 0(%rax,%rcx,8),%xmm4 + movq 8(%rax,%rcx,8),%xmm5 + movq 16(%rax,%rcx,8),%xmm6 + movq 24(%rax,%rcx,8),%xmm7 + jmp .Lgather +.align 16 +.Lgather: + movq -96(%rdx),%xmm0 + movq -32(%rdx),%xmm1 + pand %xmm4,%xmm0 + movq 32(%rdx),%xmm2 + pand %xmm5,%xmm1 + movq 96(%rdx),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%rdx),%rdx + por %xmm3,%xmm0 + + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subq $1,%rsi + jnz .Lgather + .byte 0xf3,0xc3 +.LSEH_end_bn_gather5: +.size bn_gather5,.-bn_gather5 +.align 64 +.Lmagic_masks: +.long 0,0, 0,0, 0,0, -1,-1 +.long 0,0, 0,0, 0,0, 0,0 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/app/openssl/crypto/bn/asm/x86_64-mont5.pl b/app/openssl/crypto/bn/asm/x86_64-mont5.pl new file mode 100755 index 00000000..dae0fe24 --- /dev/null +++ b/app/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -0,0 +1,1071 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# August 2011. +# +# Companion to x86_64-mont.pl that optimizes cache-timing attack +# countermeasures. The subroutines are produced by replacing bp[i] +# references in their x86_64-mont.pl counterparts with cache-neutral +# references to powers table computed in BN_mod_exp_mont_consttime. +# In addition subroutine that scatters elements of the powers table +# is implemented, so that scatter-/gathering can be tuned without +# bn_exp.c modifications. + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# int bn_mul_mont_gather5( +$rp="%rdi"; # BN_ULONG *rp, +$ap="%rsi"; # const BN_ULONG *ap, +$bp="%rdx"; # const BN_ULONG *bp, +$np="%rcx"; # const BN_ULONG *np, +$n0="%r8"; # const BN_ULONG *n0, +$num="%r9"; # int num, + # int idx); # 0 to 2^5-1, "index" in $bp holding + # pre-computed powers of a', interlaced + # in such manner that b[0] is $bp[idx], + # b[1] is [2^5+idx], etc. +$lo0="%r10"; +$hi0="%r11"; +$hi1="%r13"; +$i="%r14"; +$j="%r15"; +$m0="%rbx"; +$m1="%rbp"; + +$code=<<___; +.text + +.globl bn_mul_mont_gather5 +.type bn_mul_mont_gather5,\@function,6 +.align 64 +bn_mul_mont_gather5: + test \$3,${num}d + jnz .Lmul_enter + cmp \$8,${num}d + jb .Lmul_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + mov ${num}d,${num}d + mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +.Lmul_alloca: +___ +$code.=<<___; + mov %rsp,%rax + lea 2($num),%r11 + neg %r11 + lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) + and \$-1024,%rsp # minimize TLB usage + + mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.Lmul_body: + mov $bp,%r12 # reassign $bp +___ + $bp="%r12"; + $STRIDE=2**5*8; # 5 is "window size" + $N=$STRIDE/4; # should match cache line size +$code.=<<___; + mov %r10,%r11 + shr \$`log($N/8)/log(2)`,%r10 + and \$`$N/8-1`,%r11 + not %r10 + lea .Lmagic_masks(%rip),%rax + and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" + lea 96($bp,%r11,8),$bp # pointer within 1st cache line + movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which + movq 8(%rax,%r10,8),%xmm5 # cache line contains element + movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument + movq 24(%rax,%r10,8),%xmm7 + + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + movq %xmm0,$m0 # m0=bp[0] + + mov ($n0),$n0 # pull n0[0] value + mov ($ap),%rax + + xor $i,$i # i=0 + xor $j,$j # j=0 + + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + + mov $n0,$m1 + mulq $m0 # ap[0]*bp[0] + mov %rax,$lo0 + mov ($np),%rax + + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq $lo0,$m1 # "tp[0]"*n0 + mov %rdx,$hi0 + + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + mulq $m1 # np[0]*m1 + add %rax,$lo0 # discarded + mov 8($ap),%rax + adc \$0,%rdx + mov %rdx,$hi1 + + lea 1($j),$j # j++ + jmp .L1st_enter + +.align 16 +.L1st: + add %rax,$hi1 + mov ($ap,$j,8),%rax + adc \$0,%rdx + add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] + mov $lo0,$hi0 + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + +.L1st_enter: + mulq $m0 # ap[j]*bp[0] + add %rax,$hi0 + mov ($np,$j,8),%rax + adc \$0,%rdx + lea 1($j),$j # j++ + mov %rdx,$lo0 + + mulq $m1 # np[j]*m1 + cmp $num,$j + jne .L1st + + movq %xmm0,$m0 # bp[1] + + add %rax,$hi1 + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + mov $lo0,$hi0 + + xor %rdx,%rdx + add $hi0,$hi1 + adc \$0,%rdx + mov $hi1,-8(%rsp,$num,8) + mov %rdx,(%rsp,$num,8) # store upmost overflow bit + + lea 1($i),$i # i++ + jmp .Louter +.align 16 +.Louter: + xor $j,$j # j=0 + mov $n0,$m1 + mov (%rsp),$lo0 + + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + + mulq $m0 # ap[0]*bp[i] + add %rax,$lo0 # ap[0]*bp[i]+tp[0] + mov ($np),%rax + adc \$0,%rdx + + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq $lo0,$m1 # tp[0]*n0 + mov %rdx,$hi0 + + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + mulq $m1 # np[0]*m1 + add %rax,$lo0 # discarded + mov 8($ap),%rax + adc \$0,%rdx + mov 8(%rsp),$lo0 # tp[1] + mov %rdx,$hi1 + + lea 1($j),$j # j++ + jmp .Linner_enter + +.align 16 +.Linner: + add %rax,$hi1 + mov ($ap,$j,8),%rax + adc \$0,%rdx + add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] + mov (%rsp,$j,8),$lo0 + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + +.Linner_enter: + mulq $m0 # ap[j]*bp[i] + add %rax,$hi0 + mov ($np,$j,8),%rax + adc \$0,%rdx + add $hi0,$lo0 # ap[j]*bp[i]+tp[j] + mov %rdx,$hi0 + adc \$0,$hi0 + lea 1($j),$j # j++ + + mulq $m1 # np[j]*m1 + cmp $num,$j + jne .Linner + + movq %xmm0,$m0 # bp[i+1] + + add %rax,$hi1 + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] + mov (%rsp,$j,8),$lo0 + adc \$0,%rdx + mov $hi1,-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$hi1 + + xor %rdx,%rdx + add $hi0,$hi1 + adc \$0,%rdx + add $lo0,$hi1 # pull upmost overflow bit + adc \$0,%rdx + mov $hi1,-8(%rsp,$num,8) + mov %rdx,(%rsp,$num,8) # store upmost overflow bit + + lea 1($i),$i # i++ + cmp $num,$i + jl .Louter + + xor $i,$i # i=0 and clear CF! + mov (%rsp),%rax # tp[0] + lea (%rsp),$ap # borrow ap for tp + mov $num,$j # j=num + jmp .Lsub +.align 16 +.Lsub: sbb ($np,$i,8),%rax + mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] + mov 8($ap,$i,8),%rax # tp[i+1] + lea 1($i),$i # i++ + dec $j # doesnn't affect CF! + jnz .Lsub + + sbb \$0,%rax # handle upmost overflow bit + xor $i,$i + and %rax,$ap + not %rax + mov $rp,$np + and %rax,$np + mov $num,$j # j=num + or $np,$ap # ap=borrow?tp:rp +.align 16 +.Lcopy: # copy or in-place refresh + mov ($ap,$i,8),%rax + mov $i,(%rsp,$i,8) # zap temporary vector + mov %rax,($rp,$i,8) # rp[i]=tp[i] + lea 1($i),$i + sub \$1,$j + jnz .Lcopy + + mov 8(%rsp,$num,8),%rsi # restore %rsp + mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps (%rsi),%xmm6 + movaps 0x10(%rsi),%xmm7 + lea 0x28(%rsi),%rsi +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lmul_epilogue: + ret +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +___ +{{{ +my @A=("%r10","%r11"); +my @N=("%r13","%rdi"); +$code.=<<___; +.type bn_mul4x_mont_gather5,\@function,6 +.align 16 +bn_mul4x_mont_gather5: +.Lmul4x_enter: + mov ${num}d,${num}d + mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +.Lmul4x_alloca: +___ +$code.=<<___; + mov %rsp,%rax + lea 4($num),%r11 + neg %r11 + lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)) + and \$-1024,%rsp # minimize TLB usage + + mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.Lmul4x_body: + mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp + mov %rdx,%r12 # reassign $bp +___ + $bp="%r12"; + $STRIDE=2**5*8; # 5 is "window size" + $N=$STRIDE/4; # should match cache line size +$code.=<<___; + mov %r10,%r11 + shr \$`log($N/8)/log(2)`,%r10 + and \$`$N/8-1`,%r11 + not %r10 + lea .Lmagic_masks(%rip),%rax + and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" + lea 96($bp,%r11,8),$bp # pointer within 1st cache line + movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which + movq 8(%rax,%r10,8),%xmm5 # cache line contains element + movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument + movq 24(%rax,%r10,8),%xmm7 + + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + movq %xmm0,$m0 # m0=bp[0] + mov ($n0),$n0 # pull n0[0] value + mov ($ap),%rax + + xor $i,$i # i=0 + xor $j,$j # j=0 + + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + + mov $n0,$m1 + mulq $m0 # ap[0]*bp[0] + mov %rax,$A[0] + mov ($np),%rax + + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq $A[0],$m1 # "tp[0]"*n0 + mov %rdx,$A[1] + + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + mulq $m1 # np[0]*m1 + add %rax,$A[0] # discarded + mov 8($ap),%rax + adc \$0,%rdx + mov %rdx,$N[1] + + mulq $m0 + add %rax,$A[1] + mov 8($np),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 + add %rax,$N[1] + mov 16($ap),%rax + adc \$0,%rdx + add $A[1],$N[1] + lea 4($j),$j # j++ + adc \$0,%rdx + mov $N[1],(%rsp) + mov %rdx,$N[0] + jmp .L1st4x +.align 16 +.L1st4x: + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov ($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov 8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-8(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov 8($np,$j,8),%rax + adc \$0,%rdx + lea 4($j),$j # j++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov -16($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + cmp $num,$j + jl .L1st4x + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[0] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + movq %xmm0,$m0 # bp[1] + + xor $N[1],$N[1] + add $A[0],$N[0] + adc \$0,$N[1] + mov $N[0],-8(%rsp,$j,8) + mov $N[1],(%rsp,$j,8) # store upmost overflow bit + + lea 1($i),$i # i++ +.align 4 +.Louter4x: + xor $j,$j # j=0 + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 + + mov (%rsp),$A[0] + mov $n0,$m1 + mulq $m0 # ap[0]*bp[i] + add %rax,$A[0] # ap[0]*bp[i]+tp[0] + mov ($np),%rax + adc \$0,%rdx + + movq `3*$STRIDE/4-96`($bp),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq $A[0],$m1 # tp[0]*n0 + mov %rdx,$A[1] + + por %xmm2,%xmm0 + lea $STRIDE($bp),$bp + por %xmm3,%xmm0 + + mulq $m1 # np[0]*m1 + add %rax,$A[0] # "$N[0]", discarded + mov 8($ap),%rax + adc \$0,%rdx + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov 8($np),%rax + adc \$0,%rdx + add 8(%rsp),$A[1] # +tp[1] + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov 16($ap),%rax + adc \$0,%rdx + add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] + lea 4($j),$j # j+=2 + adc \$0,%rdx + mov %rdx,$N[0] + jmp .Linner4x +.align 16 +.Linner4x: + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + add -8(%rsp,$j,8),$A[1] + adc \$0,%rdx + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov ($np,$j,8),%rax + adc \$0,%rdx + add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov 8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov 8($np,$j,8),%rax + adc \$0,%rdx + add 8(%rsp,$j,8),$A[1] + adc \$0,%rdx + lea 4($j),$j # j++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov -16($ap,$j,8),%rax + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[0],-40(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + cmp $num,$j + jl .Linner4x + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[0] + mov -16($np,$j,8),%rax + adc \$0,%rdx + add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + adc \$0,%rdx + mov %rdx,$A[1] + + mulq $m1 # np[j]*m1 + add %rax,$N[0] + mov -8($ap,$j,8),%rax + adc \$0,%rdx + add $A[0],$N[0] + adc \$0,%rdx + mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[1] + + mulq $m0 # ap[j]*bp[i] + add %rax,$A[1] + mov -8($np,$j,8),%rax + adc \$0,%rdx + add -8(%rsp,$j,8),$A[1] + adc \$0,%rdx + lea 1($i),$i # i++ + mov %rdx,$A[0] + + mulq $m1 # np[j]*m1 + add %rax,$N[1] + mov ($ap),%rax # ap[0] + adc \$0,%rdx + add $A[1],$N[1] + adc \$0,%rdx + mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov %rdx,$N[0] + + movq %xmm0,$m0 # bp[i+1] + mov $N[1],-16(%rsp,$j,8) # tp[j-1] + + xor $N[1],$N[1] + add $A[0],$N[0] + adc \$0,$N[1] + add (%rsp,$num,8),$N[0] # pull upmost overflow bit + adc \$0,$N[1] + mov $N[0],-8(%rsp,$j,8) + mov $N[1],(%rsp,$j,8) # store upmost overflow bit + + cmp $num,$i + jl .Louter4x +___ +{ +my @ri=("%rax","%rdx",$m0,$m1); +$code.=<<___; + mov 16(%rsp,$num,8),$rp # restore $rp + mov 0(%rsp),@ri[0] # tp[0] + pxor %xmm0,%xmm0 + mov 8(%rsp),@ri[1] # tp[1] + shr \$2,$num # num/=4 + lea (%rsp),$ap # borrow ap for tp + xor $i,$i # i=0 and clear CF! + + sub 0($np),@ri[0] + mov 16($ap),@ri[2] # tp[2] + mov 24($ap),@ri[3] # tp[3] + sbb 8($np),@ri[1] + lea -1($num),$j # j=num/4-1 + jmp .Lsub4x +.align 16 +.Lsub4x: + mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 16($np,$i,8),@ri[2] + mov 32($ap,$i,8),@ri[0] # tp[i+1] + mov 40($ap,$i,8),@ri[1] + sbb 24($np,$i,8),@ri[3] + mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] + mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 32($np,$i,8),@ri[0] + mov 48($ap,$i,8),@ri[2] + mov 56($ap,$i,8),@ri[3] + sbb 40($np,$i,8),@ri[1] + lea 4($i),$i # i++ + dec $j # doesnn't affect CF! + jnz .Lsub4x + + mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] + mov 32($ap,$i,8),@ri[0] # load overflow bit + sbb 16($np,$i,8),@ri[2] + mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] + sbb 24($np,$i,8),@ri[3] + mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] + + sbb \$0,@ri[0] # handle upmost overflow bit + mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] + xor $i,$i # i=0 + and @ri[0],$ap + not @ri[0] + mov $rp,$np + and @ri[0],$np + lea -1($num),$j + or $np,$ap # ap=borrow?tp:rp + + movdqu ($ap),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,($rp) + jmp .Lcopy4x +.align 16 +.Lcopy4x: # copy or in-place refresh + movdqu 16($ap,$i),%xmm2 + movdqu 32($ap,$i),%xmm1 + movdqa %xmm0,16(%rsp,$i) + movdqu %xmm2,16($rp,$i) + movdqa %xmm0,32(%rsp,$i) + movdqu %xmm1,32($rp,$i) + lea 32($i),$i + dec $j + jnz .Lcopy4x + + shl \$2,$num + movdqu 16($ap,$i),%xmm2 + movdqa %xmm0,16(%rsp,$i) + movdqu %xmm2,16($rp,$i) +___ +} +$code.=<<___; + mov 8(%rsp,$num,8),%rsi # restore %rsp + mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps (%rsi),%xmm6 + movaps 0x10(%rsi),%xmm7 + lea 0x28(%rsi),%rsi +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lmul4x_epilogue: + ret +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 +___ +}}} + +{ +my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order +my $out=$inp; +my $STRIDE=2**5*8; +my $N=$STRIDE/4; + +$code.=<<___; +.globl bn_scatter5 +.type bn_scatter5,\@abi-omnipotent +.align 16 +bn_scatter5: + cmp \$0, $num + jz .Lscatter_epilogue + lea ($tbl,$idx,8),$tbl +.Lscatter: + mov ($inp),%rax + lea 8($inp),$inp + mov %rax,($tbl) + lea 32*8($tbl),$tbl + sub \$1,$num + jnz .Lscatter +.Lscatter_epilogue: + ret +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.type bn_gather5,\@abi-omnipotent +.align 16 +bn_gather5: +___ +$code.=<<___ if ($win64); +.LSEH_begin_bn_gather5: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp + .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) + .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) +___ +$code.=<<___; + mov $idx,%r11 + shr \$`log($N/8)/log(2)`,$idx + and \$`$N/8-1`,%r11 + not $idx + lea .Lmagic_masks(%rip),%rax + and \$`2**5/($N/8)-1`,$idx # 5 is "window size" + lea 96($tbl,%r11,8),$tbl # pointer within 1st cache line + movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which + movq 8(%rax,$idx,8),%xmm5 # cache line contains element + movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument + movq 24(%rax,$idx,8),%xmm7 + jmp .Lgather +.align 16 +.Lgather: + movq `0*$STRIDE/4-96`($tbl),%xmm0 + movq `1*$STRIDE/4-96`($tbl),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($tbl),%xmm2 + pand %xmm5,%xmm1 + movq `3*$STRIDE/4-96`($tbl),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + lea $STRIDE($tbl),$tbl + por %xmm3,%xmm0 + + movq %xmm0,($out) # m0=bp[0] + lea 8($out),$out + sub \$1,$num + jnz .Lgather +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + lea 0x28(%rsp),%rsp +___ +$code.=<<___; + ret +.LSEH_end_bn_gather5: +.size bn_gather5,.-bn_gather5 +___ +} +$code.=<<___; +.align 64 +.Lmagic_masks: + .long 0,0, 0,0, 0,0, -1,-1 + .long 0,0, 0,0, 0,0, 0,0 +.asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type mul_handler,\@abi-omnipotent +.align 16 +mul_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRipRsp + + mov 8(%r11),%r10d # HandlerData[2] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 192($context),%r10 # pull $num + mov 8(%rax,%r10,8),%rax # pull saved stack pointer + + movaps (%rax),%xmm0 + movaps 16(%rax),%xmm1 + lea `40+48`(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + movups %xmm0,512($context) # restore context->Xmm6 + movups %xmm1,528($context) # restore context->Xmm7 + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size mul_handler,.-mul_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_bn_mul_mont_gather5 + .rva .LSEH_end_bn_mul_mont_gather5 + .rva .LSEH_info_bn_mul_mont_gather5 + + .rva .LSEH_begin_bn_mul4x_mont_gather5 + .rva .LSEH_end_bn_mul4x_mont_gather5 + .rva .LSEH_info_bn_mul4x_mont_gather5 + + .rva .LSEH_begin_bn_gather5 + .rva .LSEH_end_bn_gather5 + .rva .LSEH_info_bn_gather5 + +.section .xdata +.align 8 +.LSEH_info_bn_mul_mont_gather5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_mul4x_mont_gather5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_gather5: + .byte 0x01,0x0d,0x05,0x00 + .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 + .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 + .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28 +.align 8 +___ +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; +close STDOUT; diff --git a/app/openssl/crypto/bn/bn.h b/app/openssl/crypto/bn/bn.h index a0bc4783..e776c07a 100644 --- a/app/openssl/crypto/bn/bn.h +++ b/app/openssl/crypto/bn/bn.h @@ -538,6 +538,8 @@ BIGNUM *BN_mod_inverse(BIGNUM *ret, BIGNUM *BN_mod_sqrt(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); +void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords); + /* Deprecated versions */ #ifndef OPENSSL_NO_DEPRECATED BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe, @@ -558,6 +560,17 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, int do_trial_division, BN_GENCB *cb); +int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); + +int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, + const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, + const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); +int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, + BIGNUM *Xp1, BIGNUM *Xp2, + const BIGNUM *Xp, + const BIGNUM *e, BN_CTX *ctx, + BN_GENCB *cb); + BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, @@ -612,6 +625,8 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_RECP_CTX *recp, BN_CTX *ctx); +#ifndef OPENSSL_NO_EC2M + /* Functions for arithmetic over binary polynomials represented by BIGNUMs. * * The BIGNUM::neg property of BIGNUMs representing binary polynomials is @@ -663,6 +678,8 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); int BN_GF2m_arr2poly(const int p[], BIGNUM *a); +#endif + /* faster mod functions for the 'NIST primes' * 0 <= a < p^2 */ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); @@ -677,6 +694,10 @@ const BIGNUM *BN_get0_nist_prime_256(void); const BIGNUM *BN_get0_nist_prime_384(void); const BIGNUM *BN_get0_nist_prime_521(void); +int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv, + const unsigned char *message, size_t message_len, + BN_CTX *ctx); + /* library internal functions */ #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ @@ -759,11 +780,20 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); #define bn_fix_top(a) bn_check_top(a) +#define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) +#define bn_wcheck_size(bn, words) \ + do { \ + const BIGNUM *_bnum2 = (bn); \ + assert(words <= (_bnum2)->dmax && words >= (_bnum2)->top); \ + } while(0) + #else /* !BN_DEBUG */ #define bn_pollute(a) #define bn_check_top(a) #define bn_fix_top(a) bn_correct_top(a) +#define bn_check_size(bn, bits) +#define bn_wcheck_size(bn, words) #endif @@ -827,6 +857,7 @@ void ERR_load_BN_strings(void); #define BN_F_BN_EXP 123 #define BN_F_BN_EXPAND2 108 #define BN_F_BN_EXPAND_INTERNAL 120 +#define BN_F_BN_GENERATE_DSA_NONCE 140 #define BN_F_BN_GF2M_MOD 131 #define BN_F_BN_GF2M_MOD_EXP 132 #define BN_F_BN_GF2M_MOD_MUL 133 @@ -866,6 +897,7 @@ void ERR_load_BN_strings(void); #define BN_R_NOT_INITIALIZED 107 #define BN_R_NO_INVERSE 108 #define BN_R_NO_SOLUTION 116 +#define BN_R_PRIVATE_KEY_TOO_LARGE 117 #define BN_R_P_IS_NOT_PRIME 112 #define BN_R_TOO_MANY_ITERATIONS 113 #define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 diff --git a/app/openssl/crypto/bn/bn_blind.c b/app/openssl/crypto/bn/bn_blind.c index e060592f..9ed8bc2b 100644 --- a/app/openssl/crypto/bn/bn_blind.c +++ b/app/openssl/crypto/bn/bn_blind.c @@ -126,7 +126,7 @@ struct bn_blinding_st * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ #endif CRYPTO_THREADID tid; - unsigned int counter; + int counter; unsigned long flags; BN_MONT_CTX *m_ctx; int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, @@ -160,7 +160,10 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) BN_set_flags(ret->mod, BN_FLG_CONSTTIME); - ret->counter = BN_BLINDING_COUNTER; + /* Set the counter to the special value -1 + * to indicate that this is never-used fresh blinding + * that does not need updating before first use. */ + ret->counter = -1; CRYPTO_THREADID_current(&ret->tid); return(ret); err: @@ -190,7 +193,10 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) goto err; } - if (--(b->counter) == 0 && b->e != NULL && + if (b->counter == -1) + b->counter = 0; + + if (++b->counter == BN_BLINDING_COUNTER && b->e != NULL && !(b->flags & BN_BLINDING_NO_RECREATE)) { /* re-create blinding parameters */ @@ -205,8 +211,8 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) ret=1; err: - if (b->counter == 0) - b->counter = BN_BLINDING_COUNTER; + if (b->counter == BN_BLINDING_COUNTER) + b->counter = 0; return(ret); } @@ -227,6 +233,12 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx) return(0); } + if (b->counter == -1) + /* Fresh blinding, doesn't need updating. */ + b->counter = 0; + else if (!BN_BLINDING_update(b,ctx)) + return(0); + if (r != NULL) { if (!BN_copy(r, b->Ai)) ret=0; @@ -247,22 +259,19 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct int ret; bn_check_top(n); - if ((b->A == NULL) || (b->Ai == NULL)) - { - BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED); - return(0); - } if (r != NULL) ret = BN_mod_mul(n, n, r, b->mod, ctx); else - ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx); - - if (ret >= 0) { - if (!BN_BLINDING_update(b,ctx)) + if (b->Ai == NULL) + { + BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED); return(0); + } + ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx); } + bn_check_top(n); return(ret); } diff --git a/app/openssl/crypto/bn/bn_div.c b/app/openssl/crypto/bn/bn_div.c index 802a43d6..7b240318 100644 --- a/app/openssl/crypto/bn/bn_div.c +++ b/app/openssl/crypto/bn/bn_div.c @@ -141,6 +141,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, * * */ +#undef bn_div_words # define bn_div_words(n0,n1,d0) \ ({ asm volatile ( \ "divl %4" \ @@ -155,6 +156,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, * Same story here, but it's 128-bit by 64-bit division. Wow! * */ +# undef bn_div_words # define bn_div_words(n0,n1,d0) \ ({ asm volatile ( \ "divq %4" \ @@ -169,15 +171,13 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, #endif /* OPENSSL_NO_ASM */ -/* BN_div[_no_branch] computes dv := num / divisor, rounding towards +/* BN_div computes dv := num / divisor, rounding towards * zero, and sets up rm such that dv*divisor + rm = num holds. * Thus: * dv->neg == num->neg ^ divisor->neg (unless the result is zero) * rm->neg == num->neg (unless the remainder is zero) * If 'dv' or 'rm' is NULL, the respective value is not returned. */ -static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, - const BIGNUM *divisor, BN_CTX *ctx); int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_CTX *ctx) { @@ -186,6 +186,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_ULONG *resp,*wnump; BN_ULONG d0,d1; int num_n,div_n; + int no_branch=0; /* Invalid zero-padding would have particularly bad consequences * in the case of 'num', so don't just rely on bn_check_top() for this one @@ -200,7 +201,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) { - return BN_div_no_branch(dv, rm, num, divisor, ctx); + no_branch=1; } bn_check_top(dv); @@ -214,7 +215,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, return(0); } - if (BN_ucmp(num,divisor) < 0) + if (!no_branch && BN_ucmp(num,divisor) < 0) { if (rm != NULL) { if (BN_copy(rm,num) == NULL) return(0); } @@ -239,242 +240,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, norm_shift+=BN_BITS2; if (!(BN_lshift(snum,num,norm_shift))) goto err; snum->neg=0; - div_n=sdiv->top; - num_n=snum->top; - loop=num_n-div_n; - /* Lets setup a 'window' into snum - * This is the part that corresponds to the current - * 'area' being divided */ - wnum.neg = 0; - wnum.d = &(snum->d[loop]); - wnum.top = div_n; - /* only needed when BN_ucmp messes up the values between top and max */ - wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ - - /* Get the top 2 words of sdiv */ - /* div_n=sdiv->top; */ - d0=sdiv->d[div_n-1]; - d1=(div_n == 1)?0:sdiv->d[div_n-2]; - - /* pointer to the 'top' of snum */ - wnump= &(snum->d[num_n-1]); - - /* Setup to 'res' */ - res->neg= (num->neg^divisor->neg); - if (!bn_wexpand(res,(loop+1))) goto err; - res->top=loop; - resp= &(res->d[loop-1]); - - /* space for temp */ - if (!bn_wexpand(tmp,(div_n+1))) goto err; - if (BN_ucmp(&wnum,sdiv) >= 0) + if (no_branch) { - /* If BN_DEBUG_RAND is defined BN_ucmp changes (via - * bn_pollute) the const bignum arguments => - * clean the values between top and max again */ - bn_clear_top2max(&wnum); - bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); - *resp=1; - } - else - res->top--; - /* if res->top == 0 then clear the neg value otherwise decrease - * the resp pointer */ - if (res->top == 0) - res->neg = 0; - else - resp--; - - for (i=0; i 0x%08X\n", - n0, n1, d0, q); -#endif -#endif - -#ifndef REMAINDER_IS_ALREADY_CALCULATED - /* - * rem doesn't have to be BN_ULLONG. The least we - * know it's less that d0, isn't it? - */ - rem=(n1-q*d0)&BN_MASK2; -#endif - t2=(BN_ULLONG)d1*q; - - for (;;) - { - if (t2 <= ((((BN_ULLONG)rem)< 0x%08X\n", - n0, n1, d0, q); -#endif -#ifndef REMAINDER_IS_ALREADY_CALCULATED - rem=(n1-q*d0)&BN_MASK2; -#endif - -#if defined(BN_UMULT_LOHI) - BN_UMULT_LOHI(t2l,t2h,d1,q); -#elif defined(BN_UMULT_HIGH) - t2l = d1 * q; - t2h = BN_UMULT_HIGH(d1,q); -#else + /* Since we don't know whether snum is larger than sdiv, + * we pad snum with enough zeroes without changing its + * value. + */ + if (snum->top <= sdiv->top+1) { - BN_ULONG ql, qh; - t2l=LBITS(d1); t2h=HBITS(d1); - ql =LBITS(q); qh =HBITS(q); - mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ + if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err; + for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0; + snum->top = sdiv->top + 2; } -#endif - - for (;;) - { - if ((t2h < rem) || - ((t2h == rem) && (t2l <= wnump[-2]))) - break; - q--; - rem += d0; - if (rem < d0) break; /* don't let rem overflow */ - if (t2l < d1) t2h--; t2l -= d1; - } -#endif /* !BN_LLONG */ - } -#endif /* !BN_DIV3W */ - - l0=bn_mul_words(tmp->d,sdiv->d,div_n,q); - tmp->d[div_n]=l0; - wnum.d--; - /* ingore top values of the bignums just sub the two - * BN_ULONG arrays with bn_sub_words */ - if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1)) + else { - /* Note: As we have considered only the leading - * two BN_ULONGs in the calculation of q, sdiv * q - * might be greater than wnum (but then (q-1) * sdiv - * is less or equal than wnum) - */ - q--; - if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) - /* we can't have an overflow here (assuming - * that q != 0, but if q == 0 then tmp is - * zero anyway) */ - (*wnump)++; + if (bn_wexpand(snum, snum->top + 1) == NULL) goto err; + snum->d[snum->top] = 0; + snum->top ++; } - /* store part of the result */ - *resp = q; - } - bn_correct_top(snum); - if (rm != NULL) - { - /* Keep a copy of the neg flag in num because if rm==num - * BN_rshift() will overwrite it. - */ - int neg = num->neg; - BN_rshift(rm,snum,norm_shift); - if (!BN_is_zero(rm)) - rm->neg = neg; - bn_check_top(rm); - } - BN_CTX_end(ctx); - return(1); -err: - bn_check_top(rm); - BN_CTX_end(ctx); - return(0); - } - - -/* BN_div_no_branch is a special version of BN_div. It does not contain - * branches that may leak sensitive information. - */ -static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, - const BIGNUM *divisor, BN_CTX *ctx) - { - int norm_shift,i,loop; - BIGNUM *tmp,wnum,*snum,*sdiv,*res; - BN_ULONG *resp,*wnump; - BN_ULONG d0,d1; - int num_n,div_n; - - bn_check_top(dv); - bn_check_top(rm); - /* bn_check_top(num); */ /* 'num' has been checked in BN_div() */ - bn_check_top(divisor); - - if (BN_is_zero(divisor)) - { - BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO); - return(0); - } - - BN_CTX_start(ctx); - tmp=BN_CTX_get(ctx); - snum=BN_CTX_get(ctx); - sdiv=BN_CTX_get(ctx); - if (dv == NULL) - res=BN_CTX_get(ctx); - else res=dv; - if (sdiv == NULL || res == NULL) goto err; - - /* First we normalise the numbers */ - norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); - if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err; - sdiv->neg=0; - norm_shift+=BN_BITS2; - if (!(BN_lshift(snum,num,norm_shift))) goto err; - snum->neg=0; - - /* Since we don't know whether snum is larger than sdiv, - * we pad snum with enough zeroes without changing its - * value. - */ - if (snum->top <= sdiv->top+1) - { - if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err; - for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0; - snum->top = sdiv->top + 2; - } - else - { - if (bn_wexpand(snum, snum->top + 1) == NULL) goto err; - snum->d[snum->top] = 0; - snum->top ++; } div_n=sdiv->top; @@ -500,12 +284,27 @@ static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, /* Setup to 'res' */ res->neg= (num->neg^divisor->neg); if (!bn_wexpand(res,(loop+1))) goto err; - res->top=loop-1; + res->top=loop-no_branch; resp= &(res->d[loop-1]); /* space for temp */ if (!bn_wexpand(tmp,(div_n+1))) goto err; + if (!no_branch) + { + if (BN_ucmp(&wnum,sdiv) >= 0) + { + /* If BN_DEBUG_RAND is defined BN_ucmp changes (via + * bn_pollute) the const bignum arguments => + * clean the values between top and max again */ + bn_clear_top2max(&wnum); + bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); + *resp=1; + } + else + res->top--; + } + /* if res->top == 0 then clear the neg value otherwise decrease * the resp pointer */ if (res->top == 0) @@ -638,7 +437,7 @@ X) -> 0x%08X\n", rm->neg = neg; bn_check_top(rm); } - bn_correct_top(res); + if (no_branch) bn_correct_top(res); BN_CTX_end(ctx); return(1); err: @@ -646,5 +445,4 @@ err: BN_CTX_end(ctx); return(0); } - #endif diff --git a/app/openssl/crypto/bn/bn_err.c b/app/openssl/crypto/bn/bn_err.c index cfe2eb94..f722b525 100644 --- a/app/openssl/crypto/bn/bn_err.c +++ b/app/openssl/crypto/bn/bn_err.c @@ -87,6 +87,7 @@ static ERR_STRING_DATA BN_str_functs[]= {ERR_FUNC(BN_F_BN_EXP), "BN_exp"}, {ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"}, {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"}, +{ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"}, {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"}, {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"}, {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"}, @@ -129,6 +130,7 @@ static ERR_STRING_DATA BN_str_reasons[]= {ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"}, {ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"}, {ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"}, +{ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE) ,"private key too large"}, {ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"}, {ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"}, {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"}, diff --git a/app/openssl/crypto/bn/bn_exp.c b/app/openssl/crypto/bn/bn_exp.c index d9b6c737..2abf6fd6 100644 --- a/app/openssl/crypto/bn/bn_exp.c +++ b/app/openssl/crypto/bn/bn_exp.c @@ -113,6 +113,18 @@ #include "cryptlib.h" #include "bn_lcl.h" +#include +#ifdef _WIN32 +# include +# ifndef alloca +# define alloca _alloca +# endif +#elif defined(__GNUC__) +# ifndef alloca +# define alloca(s) __builtin_alloca((s)) +# endif +#endif + /* maximum precomputation table size for *variable* sliding windows */ #define TABLE_SIZE 32 @@ -522,23 +534,17 @@ err: * as cache lines are concerned. The following functions are used to transfer a BIGNUM * from/to that table. */ -static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) +static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) { size_t i, j; - if (bn_wexpand(b, top) == NULL) - return 0; - while (b->top < top) - { - b->d[b->top++] = 0; - } - + if (top > b->top) + top = b->top; /* this works because 'buf' is explicitly zeroed */ for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) { buf[j] = ((unsigned char*)b->d)[i]; } - bn_correct_top(b); return 1; } @@ -561,7 +567,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf /* Given a pointer value, compute the next address that is a cache line multiple. */ #define MOD_EXP_CTIME_ALIGN(x_) \ - ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) + ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) /* This variant of BN_mod_exp_mont() uses fixed windows and the special * precomputation memory layout to limit data-dependency to a minimum @@ -572,17 +578,15 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i,bits,ret=0,idx,window,wvalue; + int i,bits,ret=0,window,wvalue; int top; - BIGNUM *r; - const BIGNUM *aa; BN_MONT_CTX *mont=NULL; int numPowers; unsigned char *powerbufFree=NULL; int powerbufLen = 0; unsigned char *powerbuf=NULL; - BIGNUM *computeTemp=NULL, *am=NULL; + BIGNUM tmp, am; bn_check_top(a); bn_check_top(p); @@ -602,10 +606,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, return ret; } - /* Initialize BIGNUM context and allocate intermediate result */ BN_CTX_start(ctx); - r = BN_CTX_get(ctx); - if (r == NULL) goto err; /* Allocate a montgomery context if it was not supplied by the caller. * If this is not done, things will break in the montgomery part. @@ -620,40 +621,154 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, /* Get the window size to use with size of p. */ window = BN_window_bits_for_ctime_exponent_size(bits); +#if defined(OPENSSL_BN_ASM_MONT5) + if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ +#endif /* Allocate a buffer large enough to hold all of the pre-computed - * powers of a. + * powers of am, am itself and tmp. */ numPowers = 1 << window; - powerbufLen = sizeof(m->d[0])*top*numPowers; + powerbufLen = sizeof(m->d[0])*(top*numPowers + + ((2*top)>numPowers?(2*top):numPowers)); +#ifdef alloca + if (powerbufLen < 3072) + powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); + else +#endif if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) goto err; powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); memset(powerbuf, 0, powerbufLen); - /* Initialize the intermediate result. Do this early to save double conversion, - * once each for a^0 and intermediate result. - */ - if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err; +#ifdef alloca + if (powerbufLen < 3072) + powerbufFree = NULL; +#endif - /* Initialize computeTemp as a^1 with montgomery precalcs */ - computeTemp = BN_CTX_get(ctx); - am = BN_CTX_get(ctx); - if (computeTemp==NULL || am==NULL) goto err; + /* lay down tmp and am right after powers table */ + tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers); + am.d = tmp.d + top; + tmp.top = am.top = 0; + tmp.dmax = am.dmax = top; + tmp.neg = am.neg = 0; + tmp.flags = am.flags = BN_FLG_STATIC_DATA; + + /* prepare a^0 in Montgomery domain */ +#if 1 + if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; +#else + tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ + for (i=1;id[i])&BN_MASK2; + tmp.top = top; +#endif + /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a,m) >= 0) { - if (!BN_mod(am,a,m,ctx)) - goto err; - aa= am; + if (!BN_mod(&am,a,m,ctx)) goto err; + if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err; } - else - aa=a; - if (!BN_to_montgomery(am,aa,mont,ctx)) goto err; - if (!BN_copy(computeTemp, am)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err; + else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; + +#if defined(OPENSSL_BN_ASM_MONT5) + /* This optimization uses ideas from http://eprint.iacr.org/2011/239, + * specifically optimization of cache-timing attack countermeasures + * and pre-computation optimization. */ + + /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as + * 512-bit RSA is hardly relevant, we omit it to spare size... */ + if (window==5) + { + void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap, + const void *table,const BN_ULONG *np, + const BN_ULONG *n0,int num,int power); + void bn_scatter5(const BN_ULONG *inp,size_t num, + void *table,size_t power); + void bn_gather5(BN_ULONG *out,size_t num, + void *table,size_t power); + + BN_ULONG *np=mont->N.d, *n0=mont->n0; + + /* BN_to_montgomery can contaminate words above .top + * [in BN_DEBUG[_DEBUG] build]... */ + for (i=am.top; i=0; i--,bits--) + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); + bn_gather5(tmp.d,top,powerbuf,wvalue); + + /* Scan the exponent one window at a time starting from the most + * significant bits. + */ + while (bits >= 0) + { + for (wvalue=0, i=0; i<5; i++,bits--) + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); + + bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); + bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); + bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); + bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); + bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); + bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); + } + + tmp.top=top; + bn_correct_top(&tmp); + } + else +#endif + { + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; /* If the window size is greater than 1, then calculate * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) @@ -662,62 +777,54 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, */ if (window > 1) { - for (i=2; i= 0) + bits--; + for (wvalue=0, i=bits%window; i>=0; i--,bits--) + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; + + /* Scan the exponent one window at a time starting from the most + * significant bits. + */ + while (bits >= 0) { wvalue=0; /* The 'value' of the window */ /* Scan the window, squaring the result as we go */ - for (i=0; i> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] #endif +#if !defined(OPENSSL_BN_ASM_GF2m) /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, * result is a polynomial r with degree < 2 * BN_BITS - 1 * The caller MUST ensure that the variables have the right amount @@ -216,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ } - +#else +void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); +#endif /* Add polynomials a and b and store result in r; r could be a or b, a and b * could be equal; r is the bitwise XOR of a and b. @@ -360,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) { int ret = 0; - const int max = BN_num_bits(p) + 1; - int *arr=NULL; + int arr[6]; bn_check_top(a); bn_check_top(p); - if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; - ret = BN_GF2m_poly2arr(p, arr, max); - if (!ret || ret > max) + ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0])); + if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0]))) { BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH); - goto err; + return 0; } ret = BN_GF2m_mod_arr(r, a, arr); bn_check_top(r); -err: - if (arr) OPENSSL_free(arr); return ret; } @@ -521,7 +522,7 @@ err: */ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { - BIGNUM *b, *c, *u, *v, *tmp; + BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp; int ret = 0; bn_check_top(a); @@ -529,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) BN_CTX_start(ctx); - b = BN_CTX_get(ctx); - c = BN_CTX_get(ctx); - u = BN_CTX_get(ctx); - v = BN_CTX_get(ctx); - if (v == NULL) goto err; + if ((b = BN_CTX_get(ctx))==NULL) goto err; + if ((c = BN_CTX_get(ctx))==NULL) goto err; + if ((u = BN_CTX_get(ctx))==NULL) goto err; + if ((v = BN_CTX_get(ctx))==NULL) goto err; - if (!BN_one(b)) goto err; if (!BN_GF2m_mod(u, a, p)) goto err; - if (!BN_copy(v, p)) goto err; - if (BN_is_zero(u)) goto err; + if (!BN_copy(v, p)) goto err; +#if 0 + if (!BN_one(b)) goto err; + while (1) { while (!BN_is_odd(u)) @@ -565,13 +566,89 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) if (!BN_GF2m_add(u, u, v)) goto err; if (!BN_GF2m_add(b, b, c)) goto err; } +#else + { + int i, ubits = BN_num_bits(u), + vbits = BN_num_bits(v), /* v is copy of p */ + top = p->top; + BN_ULONG *udp,*bdp,*vdp,*cdp; + + bn_wexpand(u,top); udp = u->d; + for (i=u->top;itop = top; + bn_wexpand(b,top); bdp = b->d; + bdp[0] = 1; + for (i=1;itop = top; + bn_wexpand(c,top); cdp = c->d; + for (i=0;itop = top; + vdp = v->d; /* It pays off to "cache" *->d pointers, because + * it allows optimizer to be more aggressive. + * But we don't have to "cache" p->d, because *p + * is declared 'const'... */ + while (1) + { + while (ubits && !(udp[0]&1)) + { + BN_ULONG u0,u1,b0,b1,mask; + + u0 = udp[0]; + b0 = bdp[0]; + mask = (BN_ULONG)0-(b0&1); + b0 ^= p->d[0]&mask; + for (i=0;i>1)|(u1<<(BN_BITS2-1)))&BN_MASK2; + u0 = u1; + b1 = bdp[i+1]^(p->d[i+1]&mask); + bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2; + b0 = b1; + } + udp[i] = u0>>1; + bdp[i] = b0>>1; + ubits--; + } + if (ubits<=BN_BITS2 && udp[0]==1) break; + + if (ubitsd; + bdp = cdp; cdp = c->d; + } + for(i=0;i # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) -# elif defined(__GNUC__) +# elif defined(__GNUC__) && __GNUC__>=2 # define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("umulh %1,%2,%0" \ @@ -247,7 +247,7 @@ extern "C" { ret; }) # endif /* compiler */ # elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) -# if defined(__GNUC__) +# if defined(__GNUC__) && __GNUC__>=2 # define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("mulhdu %0,%1,%2" \ @@ -257,7 +257,7 @@ extern "C" { # endif /* compiler */ # elif (defined(__x86_64) || defined(__x86_64__)) && \ (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) -# if defined(__GNUC__) +# if defined(__GNUC__) && __GNUC__>=2 # define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret,discard; \ asm ("mulq %3" \ @@ -280,6 +280,26 @@ extern "C" { # define BN_UMULT_HIGH(a,b) __umulh((a),(b)) # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) # endif +# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) +# if defined(__GNUC__) && __GNUC__>=2 +# if __GNUC__>=4 && __GNUC_MINOR__>=4 /* "h" constraint is no more since 4.4 */ +# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) +# define BN_UMULT_LOHI(low,high,a,b) ({ \ + __uint128_t ret=(__uint128_t)(a)*(b); \ + (high)=ret>>64; (low)=ret; }) +# else +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("dmultu %1,%2" \ + : "=h"(ret) \ + : "r"(a), "r"(b) : "l"); \ + ret; }) +# define BN_UMULT_LOHI(low,high,a,b)\ + asm ("dmultu %2,%3" \ + : "=l"(low),"=h"(high) \ + : "r"(a), "r"(b)); +# endif +# endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ @@ -459,6 +479,10 @@ extern "C" { } #endif /* !BN_LLONG */ +#if defined(OPENSSL_DOING_MAKEDEPEND) && defined(OPENSSL_FIPS) +#undef bn_div_words +#endif + void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb); void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); diff --git a/app/openssl/crypto/bn/bn_lib.c b/app/openssl/crypto/bn/bn_lib.c index 5470fbe6..5461e6ee 100644 --- a/app/openssl/crypto/bn/bn_lib.c +++ b/app/openssl/crypto/bn/bn_lib.c @@ -139,25 +139,6 @@ const BIGNUM *BN_value_one(void) return(&const_one); } -char *BN_options(void) - { - static int init=0; - static char data[16]; - - if (!init) - { - init++; -#ifdef BN_LLONG - BIO_snprintf(data,sizeof data,"bn(%d,%d)", - (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); -#else - BIO_snprintf(data,sizeof data,"bn(%d,%d)", - (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); -#endif - } - return(data); - } - int BN_num_bits_word(BN_ULONG l) { static const unsigned char bits[256]={ @@ -843,3 +824,55 @@ int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, } return bn_cmp_words(a,b,cl); } + +/* + * Constant-time conditional swap of a and b. + * a and b are swapped if condition is not 0. The code assumes that at most one bit of condition is set. + * nwords is the number of words to swap. The code assumes that at least nwords are allocated in both a and b, + * and that no more than nwords are used by either a or b. + * a and b cannot be the same number + */ +void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) + { + BN_ULONG t; + int i; + + bn_wcheck_size(a, nwords); + bn_wcheck_size(b, nwords); + + assert(a != b); + assert((condition & (condition - 1)) == 0); + assert(sizeof(BN_ULONG) >= sizeof(int)); + + condition = ((condition - 1) >> (BN_BITS2 - 1)) - 1; + + t = (a->top^b->top) & condition; + a->top ^= t; + b->top ^= t; + +#define BN_CONSTTIME_SWAP(ind) \ + do { \ + t = (a->d[ind] ^ b->d[ind]) & condition; \ + a->d[ind] ^= t; \ + b->d[ind] ^= t; \ + } while (0) + + + switch (nwords) { + default: + for (i = 10; i < nwords; i++) + BN_CONSTTIME_SWAP(i); + /* Fallthrough */ + case 10: BN_CONSTTIME_SWAP(9); /* Fallthrough */ + case 9: BN_CONSTTIME_SWAP(8); /* Fallthrough */ + case 8: BN_CONSTTIME_SWAP(7); /* Fallthrough */ + case 7: BN_CONSTTIME_SWAP(6); /* Fallthrough */ + case 6: BN_CONSTTIME_SWAP(5); /* Fallthrough */ + case 5: BN_CONSTTIME_SWAP(4); /* Fallthrough */ + case 4: BN_CONSTTIME_SWAP(3); /* Fallthrough */ + case 3: BN_CONSTTIME_SWAP(2); /* Fallthrough */ + case 2: BN_CONSTTIME_SWAP(1); /* Fallthrough */ + case 1: BN_CONSTTIME_SWAP(0); + } +#undef BN_CONSTTIME_SWAP +} diff --git a/app/openssl/crypto/bn/bn_mont.c b/app/openssl/crypto/bn/bn_mont.c index 1a866880..427b5cf4 100644 --- a/app/openssl/crypto/bn/bn_mont.c +++ b/app/openssl/crypto/bn/bn_mont.c @@ -177,31 +177,26 @@ err: static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; - BN_ULONG *ap,*np,*rp,n0,v,*nrp; - int al,nl,max,i,x,ri; + BN_ULONG *ap,*np,*rp,n0,v,carry; + int nl,max,i; n= &(mont->N); - /* mont->ri is the size of mont->N in bits (rounded up - to the word size) */ - al=ri=mont->ri/BN_BITS2; - nl=n->top; - if ((al == 0) || (nl == 0)) { ret->top=0; return(1); } + if (nl == 0) { ret->top=0; return(1); } - max=(nl+al+1); /* allow for overflow (no?) XXX */ + max=(2*nl); /* carry is stored separately */ if (bn_wexpand(r,max) == NULL) return(0); r->neg^=n->neg; np=n->d; rp=r->d; - nrp= &(r->d[nl]); /* clear the top words of T */ #if 1 for (i=r->top; id[i]=0; + rp[i]=0; #else - memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); + memset(&(rp[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); #endif r->top=max; @@ -210,7 +205,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) #ifdef BN_COUNT fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); #endif - for (i=0; i= v) - continue; - else - { - if (((++nrp[0])&BN_MASK2) != 0) continue; - if (((++nrp[1])&BN_MASK2) != 0) continue; - for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; - } - } - bn_correct_top(r); - - /* mont->ri will be a multiple of the word size and below code - * is kind of BN_rshift(ret,r,mont->ri) equivalent */ - if (r->top <= ri) - { - ret->top=0; - return(1); + v = (v+carry+rp[nl])&BN_MASK2; + carry |= (v != rp[nl]); + carry &= (v <= rp[nl]); + rp[nl]=v; } - al=r->top-ri; -#define BRANCH_FREE 1 -#if BRANCH_FREE - if (bn_wexpand(ret,ri) == NULL) return(0); - x=0-(((al-ri)>>(sizeof(al)*8-1))&1); - ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ + if (bn_wexpand(ret,nl) == NULL) return(0); + ret->top=nl; ret->neg=r->neg; rp=ret->d; - ap=&(r->d[ri]); + ap=&(r->d[nl]); +#define BRANCH_FREE 1 +#if BRANCH_FREE { - size_t m1,m2; - - v=bn_sub_words(rp,ap,np,ri); - /* this ----------------^^ works even in alri) nrp=rp; else nrp=ap; */ - /* in other words if subtraction result is real, then + v=bn_sub_words(rp,ap,np,nl)-carry; + /* if subtraction result is real, then * trick unconditional memcpy below to perform in-place * "refresh" instead of actual copy. */ - m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al>(sizeof(al)*8-1))&1); /* al>ri */ - m1|=m2; /* (al!=ri) */ - m1|=(0-(size_t)v); /* (al!=ri || v) */ - m1&=~m2; /* (al!=ri || v) && !al>ri */ - nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m1)|((PTR_SIZE_INT)ap&m1)); - } + m=(0-(size_t)v); + nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m)|((PTR_SIZE_INT)ap&m)); - /* 'itop=al; - ret->neg=r->neg; - - rp=ret->d; - ap=&(r->d[ri]); - al-=4; - for (i=0; iN)) >= 0) - { - if (!BN_usub(ret,ret,&(mont->N))) return(0); - } + if (bn_sub_words (rp,ap,np,nl)-carry) + memcpy(rp,ap,nl*sizeof(BN_ULONG)); #endif + bn_correct_top(r); + bn_correct_top(ret); bn_check_top(ret); return(1); diff --git a/app/openssl/crypto/bn/bn_nist.c b/app/openssl/crypto/bn/bn_nist.c index c6de0326..e22968d4 100644 --- a/app/openssl/crypto/bn/bn_nist.c +++ b/app/openssl/crypto/bn/bn_nist.c @@ -286,26 +286,25 @@ const BIGNUM *BN_get0_nist_prime_521(void) } -static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max) +static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max) { int i; - BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); #ifdef BN_DEBUG OPENSSL_assert(top <= max); #endif - for (i = (top); i != 0; i--) - *_tmp1++ = *_tmp2++; - for (i = (max) - (top); i != 0; i--) - *_tmp1++ = (BN_ULONG) 0; + for (i = 0; i < top; i++) + dst[i] = src[i]; + for (; i < max; i++) + dst[i] = 0; } -static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) +static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top) { int i; - BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); - for (i = (top); i != 0; i--) - *_tmp1++ = *_tmp2++; + + for (i = 0; i < top; i++) + dst[i] = src[i]; } #if BN_BITS2 == 64 @@ -319,6 +318,13 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l))) #define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0)); #define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n) +# if defined(L_ENDIAN) +# if defined(__arch64__) +# define NIST_INT64 long +# else +# define NIST_INT64 long long +# endif +# endif #else #define bn_cp_64(to, n, from, m) \ { \ @@ -330,13 +336,15 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) bn_32_set_0(to, (n)*2); \ bn_32_set_0(to, (n)*2+1); \ } -#if BN_BITS2 == 32 #define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0; #define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0; -#endif +# if defined(_WIN32) && !defined(__GNUC__) +# define NIST_INT64 __int64 +# elif defined(BN_LLONG) +# define NIST_INT64 long long +# endif #endif /* BN_BITS2 != 64 */ - #define nist_set_192(to, from, a1, a2, a3) \ { \ bn_cp_64(to, 0, from, (a3) - 3) \ @@ -350,9 +358,11 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, int top = a->top, i; int carry; register BN_ULONG *r_d, *a_d = a->d; - BN_ULONG t_d[BN_NIST_192_TOP], - buf[BN_NIST_192_TOP], - c_d[BN_NIST_192_TOP], + union { + BN_ULONG bn[BN_NIST_192_TOP]; + unsigned int ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)]; + } buf; + BN_ULONG c_d[BN_NIST_192_TOP], *res; PTR_SIZE_INT mask; static const BIGNUM _bignum_nist_p_192_sqr = { @@ -385,15 +395,48 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, else r_d = a_d; - nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP); + nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP); + +#if defined(NIST_INT64) + { + NIST_INT64 acc; /* accumulator */ + unsigned int *rp=(unsigned int *)r_d; + const unsigned int *bp=(const unsigned int *)buf.ui; + + acc = rp[0]; acc += bp[3*2-6]; + acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32; + + acc += rp[1]; acc += bp[3*2-5]; + acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32; + + acc += rp[2]; acc += bp[3*2-6]; + acc += bp[4*2-6]; + acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32; + + acc += rp[3]; acc += bp[3*2-5]; + acc += bp[4*2-5]; + acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32; + + acc += rp[4]; acc += bp[4*2-6]; + acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32; + + acc += rp[5]; acc += bp[4*2-5]; + acc += bp[5*2-5]; rp[5] = (unsigned int)acc; + + carry = (int)(acc>>32); + } +#else + { + BN_ULONG t_d[BN_NIST_192_TOP]; - nist_set_192(t_d, buf, 0, 3, 3); + nist_set_192(t_d, buf.bn, 0, 3, 3); carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); - nist_set_192(t_d, buf, 4, 4, 0); + nist_set_192(t_d, buf.bn, 4, 4, 0); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); - nist_set_192(t_d, buf, 5, 5, 5) + nist_set_192(t_d, buf.bn, 5, 5, 5) carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); - + } +#endif if (carry > 0) carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP); else @@ -407,8 +450,9 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, */ mask = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP); mask &= 0-(PTR_SIZE_INT)carry; + res = c_d; res = (BN_ULONG *) - (((PTR_SIZE_INT)c_d&~mask) | ((PTR_SIZE_INT)r_d&mask)); + (((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_192_TOP); r->top = BN_NIST_192_TOP; bn_correct_top(r); @@ -435,9 +479,11 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, int top = a->top, i; int carry; BN_ULONG *r_d, *a_d = a->d; - BN_ULONG t_d[BN_NIST_224_TOP], - buf[BN_NIST_224_TOP], - c_d[BN_NIST_224_TOP], + union { + BN_ULONG bn[BN_NIST_224_TOP]; + unsigned int ui[BN_NIST_224_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)]; + } buf; + BN_ULONG c_d[BN_NIST_224_TOP], *res; PTR_SIZE_INT mask; union { bn_addsub_f f; PTR_SIZE_INT p; } u; @@ -474,25 +520,67 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, #if BN_BITS2==64 /* copy upper 256 bits of 448 bit number ... */ - nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP); + nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP); /* ... and right shift by 32 to obtain upper 224 bits */ - nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8); + nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8); /* truncate lower part to 224 bits too */ r_d[BN_NIST_224_TOP-1] &= BN_MASK2l; #else - nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); + nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); #endif - nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0); + +#if defined(NIST_INT64) && BN_BITS2!=64 + { + NIST_INT64 acc; /* accumulator */ + unsigned int *rp=(unsigned int *)r_d; + const unsigned int *bp=(const unsigned int *)buf.ui; + + acc = rp[0]; acc -= bp[7-7]; + acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32; + + acc += rp[1]; acc -= bp[8-7]; + acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32; + + acc += rp[2]; acc -= bp[9-7]; + acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32; + + acc += rp[3]; acc += bp[7-7]; + acc += bp[11-7]; + acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32; + + acc += rp[4]; acc += bp[8-7]; + acc += bp[12-7]; + acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32; + + acc += rp[5]; acc += bp[9-7]; + acc += bp[13-7]; + acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32; + + acc += rp[6]; acc += bp[10-7]; + acc -= bp[13-7]; rp[6] = (unsigned int)acc; + + carry = (int)(acc>>32); +# if BN_BITS2==64 + rp[7] = carry; +# endif + } +#else + { + BN_ULONG t_d[BN_NIST_224_TOP]; + + nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0); carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0); + nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7); + nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11); + nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP); #if BN_BITS2==64 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32); +#endif + } #endif u.f = bn_sub_words; if (carry > 0) @@ -521,7 +609,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, /* otherwise it's effectively same as in BN_nist_mod_192... */ mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_224_TOP); r->top = BN_NIST_224_TOP; @@ -548,9 +637,11 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, int i, top = a->top; int carry = 0; register BN_ULONG *a_d = a->d, *r_d; - BN_ULONG t_d[BN_NIST_256_TOP], - buf[BN_NIST_256_TOP], - c_d[BN_NIST_256_TOP], + union { + BN_ULONG bn[BN_NIST_256_TOP]; + unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)]; + } buf; + BN_ULONG c_d[BN_NIST_256_TOP], *res; PTR_SIZE_INT mask; union { bn_addsub_f f; PTR_SIZE_INT p; } u; @@ -584,12 +675,87 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, else r_d = a_d; - nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP); + nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP); + +#if defined(NIST_INT64) + { + NIST_INT64 acc; /* accumulator */ + unsigned int *rp=(unsigned int *)r_d; + const unsigned int *bp=(const unsigned int *)buf.ui; + + acc = rp[0]; acc += bp[8-8]; + acc += bp[9-8]; + acc -= bp[11-8]; + acc -= bp[12-8]; + acc -= bp[13-8]; + acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32; + + acc += rp[1]; acc += bp[9-8]; + acc += bp[10-8]; + acc -= bp[12-8]; + acc -= bp[13-8]; + acc -= bp[14-8]; + acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32; + + acc += rp[2]; acc += bp[10-8]; + acc += bp[11-8]; + acc -= bp[13-8]; + acc -= bp[14-8]; + acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32; + + acc += rp[3]; acc += bp[11-8]; + acc += bp[11-8]; + acc += bp[12-8]; + acc += bp[12-8]; + acc += bp[13-8]; + acc -= bp[15-8]; + acc -= bp[8-8]; + acc -= bp[9-8]; rp[3] = (unsigned int)acc; acc >>= 32; + + acc += rp[4]; acc += bp[12-8]; + acc += bp[12-8]; + acc += bp[13-8]; + acc += bp[13-8]; + acc += bp[14-8]; + acc -= bp[9-8]; + acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32; + + acc += rp[5]; acc += bp[13-8]; + acc += bp[13-8]; + acc += bp[14-8]; + acc += bp[14-8]; + acc += bp[15-8]; + acc -= bp[10-8]; + acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32; + + acc += rp[6]; acc += bp[14-8]; + acc += bp[14-8]; + acc += bp[15-8]; + acc += bp[15-8]; + acc += bp[14-8]; + acc += bp[13-8]; + acc -= bp[8-8]; + acc -= bp[9-8]; rp[6] = (unsigned int)acc; acc >>= 32; + + acc += rp[7]; acc += bp[15-8]; + acc += bp[15-8]; + acc += bp[15-8]; + acc += bp[8 -8]; + acc -= bp[10-8]; + acc -= bp[11-8]; + acc -= bp[12-8]; + acc -= bp[13-8]; rp[7] = (unsigned int)acc; + + carry = (int)(acc>>32); + } +#else + { + BN_ULONG t_d[BN_NIST_256_TOP]; /*S1*/ - nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0); + nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0); /*S2*/ - nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0); + nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0); carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP); /* left shift */ { @@ -607,24 +773,26 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, } carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*S3*/ - nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8); + nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*S4*/ - nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9); + nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*D1*/ - nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11); + nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*D2*/ - nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12); + nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*D3*/ - nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13); + nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); /*D4*/ - nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14); + nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); + } +#endif /* see BN_nist_mod_224 for explanation */ u.f = bn_sub_words; if (carry > 0) @@ -641,7 +809,8 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_256_TOP); r->top = BN_NIST_256_TOP; @@ -672,9 +841,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, int i, top = a->top; int carry = 0; register BN_ULONG *r_d, *a_d = a->d; - BN_ULONG t_d[BN_NIST_384_TOP], - buf[BN_NIST_384_TOP], - c_d[BN_NIST_384_TOP], + union { + BN_ULONG bn[BN_NIST_384_TOP]; + unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)]; + } buf; + BN_ULONG c_d[BN_NIST_384_TOP], *res; PTR_SIZE_INT mask; union { bn_addsub_f f; PTR_SIZE_INT p; } u; @@ -709,10 +880,100 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, else r_d = a_d; - nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP); + nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP); + +#if defined(NIST_INT64) + { + NIST_INT64 acc; /* accumulator */ + unsigned int *rp=(unsigned int *)r_d; + const unsigned int *bp=(const unsigned int *)buf.ui; + + acc = rp[0]; acc += bp[12-12]; + acc += bp[21-12]; + acc += bp[20-12]; + acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32; + + acc += rp[1]; acc += bp[13-12]; + acc += bp[22-12]; + acc += bp[23-12]; + acc -= bp[12-12]; + acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32; + + acc += rp[2]; acc += bp[14-12]; + acc += bp[23-12]; + acc -= bp[13-12]; + acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32; + + acc += rp[3]; acc += bp[15-12]; + acc += bp[12-12]; + acc += bp[20-12]; + acc += bp[21-12]; + acc -= bp[14-12]; + acc -= bp[22-12]; + acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32; + + acc += rp[4]; acc += bp[21-12]; + acc += bp[21-12]; + acc += bp[16-12]; + acc += bp[13-12]; + acc += bp[12-12]; + acc += bp[20-12]; + acc += bp[22-12]; + acc -= bp[15-12]; + acc -= bp[23-12]; + acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32; + + acc += rp[5]; acc += bp[22-12]; + acc += bp[22-12]; + acc += bp[17-12]; + acc += bp[14-12]; + acc += bp[13-12]; + acc += bp[21-12]; + acc += bp[23-12]; + acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32; + + acc += rp[6]; acc += bp[23-12]; + acc += bp[23-12]; + acc += bp[18-12]; + acc += bp[15-12]; + acc += bp[14-12]; + acc += bp[22-12]; + acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32; + + acc += rp[7]; acc += bp[19-12]; + acc += bp[16-12]; + acc += bp[15-12]; + acc += bp[23-12]; + acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32; + + acc += rp[8]; acc += bp[20-12]; + acc += bp[17-12]; + acc += bp[16-12]; + acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32; + + acc += rp[9]; acc += bp[21-12]; + acc += bp[18-12]; + acc += bp[17-12]; + acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32; + + acc += rp[10]; acc += bp[22-12]; + acc += bp[19-12]; + acc += bp[18-12]; + acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32; + + acc += rp[11]; acc += bp[23-12]; + acc += bp[20-12]; + acc += bp[19-12]; + acc -= bp[22-12]; rp[11] = (unsigned int)acc; + + carry = (int)(acc>>32); + } +#else + { + BN_ULONG t_d[BN_NIST_384_TOP]; /*S1*/ - nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4); + nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4); /* left shift */ { register BN_ULONG *ap,t,c; @@ -729,29 +990,31 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2), t_d, BN_NIST_256_TOP); /*S2 */ - carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP); + carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP); /*S3*/ - nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21); + nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*S4*/ - nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0); + nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*S5*/ - nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0); + nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*S6*/ - nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20); + nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*D1*/ - nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23); + nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*D2*/ - nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0); + nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); /*D3*/ - nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0); + nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); + } +#endif /* see BN_nist_mod_224 for explanation */ u.f = bn_sub_words; if (carry > 0) @@ -768,7 +1031,8 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_384_TOP); r->top = BN_NIST_384_TOP; @@ -834,7 +1098,8 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP); mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP); - res = (BN_ULONG *)(((PTR_SIZE_INT)t_d&~mask) | + res = t_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d,res,BN_NIST_521_TOP); r->top = BN_NIST_521_TOP; diff --git a/app/openssl/crypto/bn/bn_print.c b/app/openssl/crypto/bn/bn_print.c index bebb466d..1743b6a7 100644 --- a/app/openssl/crypto/bn/bn_print.c +++ b/app/openssl/crypto/bn/bn_print.c @@ -357,3 +357,22 @@ end: return(ret); } #endif + +char *BN_options(void) + { + static int init=0; + static char data[16]; + + if (!init) + { + init++; +#ifdef BN_LLONG + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); +#else + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); +#endif + } + return(data); + } diff --git a/app/openssl/crypto/bn/bn_rand.c b/app/openssl/crypto/bn/bn_rand.c index b376c28f..55676f07 100644 --- a/app/openssl/crypto/bn/bn_rand.c +++ b/app/openssl/crypto/bn/bn_rand.c @@ -114,6 +114,7 @@ #include "cryptlib.h" #include "bn_lcl.h" #include +#include static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) { @@ -303,3 +304,72 @@ int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) { return bn_rand_range(1, r, range); } + +#ifndef OPENSSL_NO_SHA512 +/* BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike + * BN_rand_range, it also includes the contents of |priv| and |message| in the + * generation so that an RNG failure isn't fatal as long as |priv| remains + * secret. This is intended for use in DSA and ECDSA where an RNG weakness + * leads directly to private key exposure unless this function is used. */ +int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM* priv, + const unsigned char *message, size_t message_len, + BN_CTX *ctx) + { + SHA512_CTX sha; + /* We use 512 bits of random data per iteration to + * ensure that we have at least |range| bits of randomness. */ + unsigned char random_bytes[64]; + unsigned char digest[SHA512_DIGEST_LENGTH]; + unsigned done, todo; + /* We generate |range|+8 bytes of random output. */ + const unsigned num_k_bytes = BN_num_bytes(range) + 8; + unsigned char private_bytes[96]; + unsigned char *k_bytes; + int ret = 0; + + k_bytes = OPENSSL_malloc(num_k_bytes); + if (!k_bytes) + goto err; + + /* We copy |priv| into a local buffer to avoid exposing its length. */ + todo = sizeof(priv->d[0])*priv->top; + if (todo > sizeof(private_bytes)) + { + /* No reasonable DSA or ECDSA key should have a private key + * this large and we don't handle this case in order to avoid + * leaking the length of the private key. */ + BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE); + goto err; + } + memcpy(private_bytes, priv->d, todo); + memset(private_bytes + todo, 0, sizeof(private_bytes) - todo); + + for (done = 0; done < num_k_bytes;) { + if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1) + goto err; + SHA512_Init(&sha); + SHA512_Update(&sha, &done, sizeof(done)); + SHA512_Update(&sha, private_bytes, sizeof(private_bytes)); + SHA512_Update(&sha, message, message_len); + SHA512_Update(&sha, random_bytes, sizeof(random_bytes)); + SHA512_Final(digest, &sha); + + todo = num_k_bytes - done; + if (todo > SHA512_DIGEST_LENGTH) + todo = SHA512_DIGEST_LENGTH; + memcpy(k_bytes + done, digest, todo); + done += todo; + } + + if (!BN_bin2bn(k_bytes, num_k_bytes, out)) + goto err; + if (BN_mod(out, out, range, ctx) != 1) + goto err; + ret = 1; + +err: + if (k_bytes) + OPENSSL_free(k_bytes); + return ret; + } +#endif /* OPENSSL_NO_SHA512 */ diff --git a/app/openssl/crypto/bn/bn_shift.c b/app/openssl/crypto/bn/bn_shift.c index c4d301af..a6fca2c4 100644 --- a/app/openssl/crypto/bn/bn_shift.c +++ b/app/openssl/crypto/bn/bn_shift.c @@ -99,7 +99,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) int BN_rshift1(BIGNUM *r, const BIGNUM *a) { BN_ULONG *ap,*rp,t,c; - int i; + int i,j; bn_check_top(r); bn_check_top(a); @@ -109,22 +109,25 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) BN_zero(r); return(1); } + i = a->top; + ap= a->d; + j = i-(ap[i-1]==1); if (a != r) { - if (bn_wexpand(r,a->top) == NULL) return(0); - r->top=a->top; + if (bn_wexpand(r,j) == NULL) return(0); r->neg=a->neg; } - ap=a->d; rp=r->d; - c=0; - for (i=a->top-1; i>=0; i--) + t=ap[--i]; + c=(t&1)?BN_TBIT:0; + if (t>>=1) rp[i]=t; + while (i>0) { - t=ap[i]; + t=ap[--i]; rp[i]=((t>>1)&BN_MASK2)|c; c=(t&1)?BN_TBIT:0; } - bn_correct_top(r); + r->top=j; bn_check_top(r); return(1); } @@ -182,10 +185,11 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) BN_zero(r); return(1); } + i = (BN_num_bits(a)-n+(BN_BITS2-1))/BN_BITS2; if (r != a) { r->neg=a->neg; - if (bn_wexpand(r,a->top-nw+1) == NULL) return(0); + if (bn_wexpand(r,i) == NULL) return(0); } else { @@ -196,7 +200,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) f= &(a->d[nw]); t=r->d; j=a->top-nw; - r->top=j; + r->top=i; if (rb == 0) { @@ -212,9 +216,8 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) l= *(f++); *(t++) =(tmp|(l<>rb)&BN_MASK2; + if ((l = (l>>rb)&BN_MASK2)) *(t) = l; } - bn_correct_top(r); bn_check_top(r); return(1); } diff --git a/app/openssl/crypto/bn/bn_word.c b/app/openssl/crypto/bn/bn_word.c index ee7b87c4..de83a15b 100644 --- a/app/openssl/crypto/bn/bn_word.c +++ b/app/openssl/crypto/bn/bn_word.c @@ -144,26 +144,17 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) a->neg=!(a->neg); return(i); } - /* Only expand (and risk failing) if it's possibly necessary */ - if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) && - (bn_wexpand(a,a->top+1) == NULL)) - return(0); - i=0; - for (;;) + for (i=0;w!=0 && itop;i++) { - if (i >= a->top) - l=w; - else - l=(a->d[i]+w)&BN_MASK2; - a->d[i]=l; - if (w > l) - w=1; - else - break; - i++; + a->d[i] = l = (a->d[i]+w)&BN_MASK2; + w = (w>l)?1:0; } - if (i >= a->top) + if (w && i==a->top) + { + if (bn_wexpand(a,a->top+1) == NULL) return 0; a->top++; + a->d[i]=w; + } bn_check_top(a); return(1); } diff --git a/app/openssl/crypto/bn/bntest.c b/app/openssl/crypto/bn/bntest.c index 0cd99c5b..06f5954a 100644 --- a/app/openssl/crypto/bn/bntest.c +++ b/app/openssl/crypto/bn/bntest.c @@ -262,7 +262,7 @@ int main(int argc, char *argv[]) message(out,"BN_mod_sqrt"); if (!test_sqrt(out,ctx)) goto err; (void)BIO_flush(out); - +#ifndef OPENSSL_NO_EC2M message(out,"BN_GF2m_add"); if (!test_gf2m_add(out)) goto err; (void)BIO_flush(out); @@ -298,7 +298,7 @@ int main(int argc, char *argv[]) message(out,"BN_GF2m_mod_solve_quad"); if (!test_gf2m_mod_solve_quad(out,ctx)) goto err; (void)BIO_flush(out); - +#endif BN_CTX_free(ctx); BIO_free(out); @@ -1061,7 +1061,7 @@ int test_exp(BIO *bp, BN_CTX *ctx) BN_free(one); return(1); } - +#ifndef OPENSSL_NO_EC2M int test_gf2m_add(BIO *bp) { BIGNUM a,b,c; @@ -1636,7 +1636,7 @@ int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx) BN_free(e); return ret; } - +#endif static int genprime_cb(int p, int n, BN_GENCB *arg) { char c='*'; diff --git a/app/openssl/crypto/buffer/buf_str.c b/app/openssl/crypto/buffer/buf_str.c new file mode 100644 index 00000000..151f5ea9 --- /dev/null +++ b/app/openssl/crypto/buffer/buf_str.c @@ -0,0 +1,119 @@ +/* crypto/buffer/buffer.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include "cryptlib.h" +#include + +char *BUF_strdup(const char *str) + { + if (str == NULL) return(NULL); + return BUF_strndup(str, strlen(str)); + } + +char *BUF_strndup(const char *str, size_t siz) + { + char *ret; + + if (str == NULL) return(NULL); + + ret=OPENSSL_malloc(siz+1); + if (ret == NULL) + { + BUFerr(BUF_F_BUF_STRNDUP,ERR_R_MALLOC_FAILURE); + return(NULL); + } + BUF_strlcpy(ret,str,siz+1); + return(ret); + } + +void *BUF_memdup(const void *data, size_t siz) + { + void *ret; + + if (data == NULL) return(NULL); + + ret=OPENSSL_malloc(siz); + if (ret == NULL) + { + BUFerr(BUF_F_BUF_MEMDUP,ERR_R_MALLOC_FAILURE); + return(NULL); + } + return memcpy(ret, data, siz); + } + +size_t BUF_strlcpy(char *dst, const char *src, size_t size) + { + size_t l = 0; + for(; size > 1 && *src; size--) + { + *dst++ = *src++; + l++; + } + if (size) + *dst = '\0'; + return l + strlen(src); + } + +size_t BUF_strlcat(char *dst, const char *src, size_t size) + { + size_t l = 0; + for(; size > 0 && *dst; size--, dst++) + l++; + return l + BUF_strlcpy(dst, src, size); + } diff --git a/app/openssl/crypto/buffer/buffer.c b/app/openssl/crypto/buffer/buffer.c index 620ea8d5..d4a4ce43 100644 --- a/app/openssl/crypto/buffer/buffer.c +++ b/app/openssl/crypto/buffer/buffer.c @@ -60,6 +60,11 @@ #include "cryptlib.h" #include +/* LIMIT_BEFORE_EXPANSION is the maximum n such that (n+3)/3*4 < 2**31. That + * function is applied in several functions in this file and this limit ensures + * that the result fits in an int. */ +#define LIMIT_BEFORE_EXPANSION 0x5ffffffc + BUF_MEM *BUF_MEM_new(void) { BUF_MEM *ret; @@ -105,6 +110,12 @@ int BUF_MEM_grow(BUF_MEM *str, size_t len) str->length=len; return(len); } + /* This limit is sufficient to ensure (len+3)/3*4 < 2**31 */ + if (len > LIMIT_BEFORE_EXPANSION) + { + BUFerr(BUF_F_BUF_MEM_GROW,ERR_R_MALLOC_FAILURE); + return 0; + } n=(len+3)/3*4; if (str->data == NULL) ret=OPENSSL_malloc(n); @@ -142,6 +153,12 @@ int BUF_MEM_grow_clean(BUF_MEM *str, size_t len) str->length=len; return(len); } + /* This limit is sufficient to ensure (len+3)/3*4 < 2**31 */ + if (len > LIMIT_BEFORE_EXPANSION) + { + BUFerr(BUF_F_BUF_MEM_GROW_CLEAN,ERR_R_MALLOC_FAILURE); + return 0; + } n=(len+3)/3*4; if (str->data == NULL) ret=OPENSSL_malloc(n); @@ -162,72 +179,14 @@ int BUF_MEM_grow_clean(BUF_MEM *str, size_t len) return(len); } -char *BUF_strdup(const char *str) - { - if (str == NULL) return(NULL); - return BUF_strndup(str, strlen(str)); - } - -char *BUF_strndup(const char *str, size_t siz) - { - char *ret; - - if (str == NULL) return(NULL); - - ret=OPENSSL_malloc(siz+1); - if (ret == NULL) - { - BUFerr(BUF_F_BUF_STRNDUP,ERR_R_MALLOC_FAILURE); - return(NULL); - } - BUF_strlcpy(ret,str,siz+1); - return(ret); - } - -void *BUF_memdup(const void *data, size_t siz) - { - void *ret; - - if (data == NULL) return(NULL); - - ret=OPENSSL_malloc(siz); - if (ret == NULL) - { - BUFerr(BUF_F_BUF_MEMDUP,ERR_R_MALLOC_FAILURE); - return(NULL); - } - return memcpy(ret, data, siz); - } - -size_t BUF_strlcpy(char *dst, const char *src, size_t size) - { - size_t l = 0; - for(; size > 1 && *src; size--) - { - *dst++ = *src++; - l++; - } - if (size) - *dst = '\0'; - return l + strlen(src); - } - -size_t BUF_strlcat(char *dst, const char *src, size_t size) - { - size_t l = 0; - for(; size > 0 && *dst; size--, dst++) - l++; - return l + BUF_strlcpy(dst, src, size); - } - -void BUF_reverse(unsigned char *out, unsigned char *in, size_t size) +void BUF_reverse(unsigned char *out, const unsigned char *in, size_t size) { size_t i; if (in) { out += size - 1; for (i = 0; i < size; i++) - *in++ = *out--; + *out-- = *in++; } else { diff --git a/app/openssl/crypto/buffer/buffer.h b/app/openssl/crypto/buffer/buffer.h index 178e4182..f8da32b4 100644 --- a/app/openssl/crypto/buffer/buffer.h +++ b/app/openssl/crypto/buffer/buffer.h @@ -88,7 +88,7 @@ int BUF_MEM_grow_clean(BUF_MEM *str, size_t len); char * BUF_strdup(const char *str); char * BUF_strndup(const char *str, size_t siz); void * BUF_memdup(const void *data, size_t siz); -void BUF_reverse(unsigned char *out, unsigned char *in, size_t siz); +void BUF_reverse(unsigned char *out, const unsigned char *in, size_t siz); /* safe string functions */ size_t BUF_strlcpy(char *dst,const char *src,size_t siz); diff --git a/app/openssl/crypto/cmac/cm_ameth.c b/app/openssl/crypto/cmac/cm_ameth.c new file mode 100644 index 00000000..0b8e5670 --- /dev/null +++ b/app/openssl/crypto/cmac/cm_ameth.c @@ -0,0 +1,97 @@ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project 2010. + */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include "cryptlib.h" +#include +#include +#include "asn1_locl.h" + +/* CMAC "ASN1" method. This is just here to indicate the + * maximum CMAC output length and to free up a CMAC + * key. + */ + +static int cmac_size(const EVP_PKEY *pkey) + { + return EVP_MAX_BLOCK_LENGTH; + } + +static void cmac_key_free(EVP_PKEY *pkey) + { + CMAC_CTX *cmctx = (CMAC_CTX *)pkey->pkey.ptr; + if (cmctx) + CMAC_CTX_free(cmctx); + } + +const EVP_PKEY_ASN1_METHOD cmac_asn1_meth = + { + EVP_PKEY_CMAC, + EVP_PKEY_CMAC, + 0, + + "CMAC", + "OpenSSL CMAC method", + + 0,0,0,0, + + 0,0,0, + + cmac_size, + 0, + 0,0,0,0,0,0,0, + + cmac_key_free, + 0, + 0,0 + }; + diff --git a/app/openssl/crypto/cmac/cm_pmeth.c b/app/openssl/crypto/cmac/cm_pmeth.c new file mode 100644 index 00000000..072228ec --- /dev/null +++ b/app/openssl/crypto/cmac/cm_pmeth.c @@ -0,0 +1,224 @@ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project 2010. + */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include "cryptlib.h" +#include +#include +#include +#include +#include "evp_locl.h" + +/* The context structure and "key" is simply a CMAC_CTX */ + +static int pkey_cmac_init(EVP_PKEY_CTX *ctx) + { + ctx->data = CMAC_CTX_new(); + if (!ctx->data) + return 0; + ctx->keygen_info_count = 0; + return 1; + } + +static int pkey_cmac_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) + { + if (!pkey_cmac_init(dst)) + return 0; + if (!CMAC_CTX_copy(dst->data, src->data)) + return 0; + return 1; + } + +static void pkey_cmac_cleanup(EVP_PKEY_CTX *ctx) + { + CMAC_CTX_free(ctx->data); + } + +static int pkey_cmac_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) + { + CMAC_CTX *cmkey = CMAC_CTX_new(); + CMAC_CTX *cmctx = ctx->data; + if (!cmkey) + return 0; + if (!CMAC_CTX_copy(cmkey, cmctx)) + { + CMAC_CTX_free(cmkey); + return 0; + } + EVP_PKEY_assign(pkey, EVP_PKEY_CMAC, cmkey); + + return 1; + } + +static int int_update(EVP_MD_CTX *ctx,const void *data,size_t count) + { + if (!CMAC_Update(ctx->pctx->data, data, count)) + return 0; + return 1; + } + +static int cmac_signctx_init(EVP_PKEY_CTX *ctx, EVP_MD_CTX *mctx) + { + EVP_MD_CTX_set_flags(mctx, EVP_MD_CTX_FLAG_NO_INIT); + mctx->update = int_update; + return 1; + } + +static int cmac_signctx(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, + EVP_MD_CTX *mctx) + { + return CMAC_Final(ctx->data, sig, siglen); + } + +static int pkey_cmac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) + { + CMAC_CTX *cmctx = ctx->data; + switch (type) + { + + case EVP_PKEY_CTRL_SET_MAC_KEY: + if (!p2 || p1 < 0) + return 0; + if (!CMAC_Init(cmctx, p2, p1, NULL, NULL)) + return 0; + break; + + case EVP_PKEY_CTRL_CIPHER: + if (!CMAC_Init(cmctx, NULL, 0, p2, ctx->engine)) + return 0; + break; + + case EVP_PKEY_CTRL_MD: + if (ctx->pkey && !CMAC_CTX_copy(ctx->data, + (CMAC_CTX *)ctx->pkey->pkey.ptr)) + return 0; + if (!CMAC_Init(cmctx, NULL, 0, NULL, NULL)) + return 0; + break; + + default: + return -2; + + } + return 1; + } + +static int pkey_cmac_ctrl_str(EVP_PKEY_CTX *ctx, + const char *type, const char *value) + { + if (!value) + { + return 0; + } + if (!strcmp(type, "key")) + { + void *p = (void *)value; + return pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY, + strlen(p), p); + } + if (!strcmp(type, "cipher")) + { + const EVP_CIPHER *c; + c = EVP_get_cipherbyname(value); + if (!c) + return 0; + return pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_CIPHER, -1, (void *)c); + } + if (!strcmp(type, "hexkey")) + { + unsigned char *key; + int r; + long keylen; + key = string_to_hex(value, &keylen); + if (!key) + return 0; + r = pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY, keylen, key); + OPENSSL_free(key); + return r; + } + return -2; + } + +const EVP_PKEY_METHOD cmac_pkey_meth = + { + EVP_PKEY_CMAC, + EVP_PKEY_FLAG_SIGCTX_CUSTOM, + pkey_cmac_init, + pkey_cmac_copy, + pkey_cmac_cleanup, + + 0, 0, + + 0, + pkey_cmac_keygen, + + 0, 0, + + 0, 0, + + 0,0, + + cmac_signctx_init, + cmac_signctx, + + 0,0, + + 0,0, + + 0,0, + + 0,0, + + pkey_cmac_ctrl, + pkey_cmac_ctrl_str + + }; diff --git a/app/openssl/crypto/cmac/cmac.c b/app/openssl/crypto/cmac/cmac.c new file mode 100644 index 00000000..8b72b096 --- /dev/null +++ b/app/openssl/crypto/cmac/cmac.c @@ -0,0 +1,308 @@ +/* crypto/cmac/cmac.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include "cryptlib.h" +#include + +#ifdef OPENSSL_FIPS +#include +#endif + +struct CMAC_CTX_st + { + /* Cipher context to use */ + EVP_CIPHER_CTX cctx; + /* Keys k1 and k2 */ + unsigned char k1[EVP_MAX_BLOCK_LENGTH]; + unsigned char k2[EVP_MAX_BLOCK_LENGTH]; + /* Temporary block */ + unsigned char tbl[EVP_MAX_BLOCK_LENGTH]; + /* Last (possibly partial) block */ + unsigned char last_block[EVP_MAX_BLOCK_LENGTH]; + /* Number of bytes in last block: -1 means context not initialised */ + int nlast_block; + }; + + +/* Make temporary keys K1 and K2 */ + +static void make_kn(unsigned char *k1, unsigned char *l, int bl) + { + int i; + /* Shift block to left, including carry */ + for (i = 0; i < bl; i++) + { + k1[i] = l[i] << 1; + if (i < bl - 1 && l[i + 1] & 0x80) + k1[i] |= 1; + } + /* If MSB set fixup with R */ + if (l[0] & 0x80) + k1[bl - 1] ^= bl == 16 ? 0x87 : 0x1b; + } + +CMAC_CTX *CMAC_CTX_new(void) + { + CMAC_CTX *ctx; + ctx = OPENSSL_malloc(sizeof(CMAC_CTX)); + if (!ctx) + return NULL; + EVP_CIPHER_CTX_init(&ctx->cctx); + ctx->nlast_block = -1; + return ctx; + } + +void CMAC_CTX_cleanup(CMAC_CTX *ctx) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->cctx.engine) + { + FIPS_cmac_ctx_cleanup(ctx); + return; + } +#endif + EVP_CIPHER_CTX_cleanup(&ctx->cctx); + OPENSSL_cleanse(ctx->tbl, EVP_MAX_BLOCK_LENGTH); + OPENSSL_cleanse(ctx->k1, EVP_MAX_BLOCK_LENGTH); + OPENSSL_cleanse(ctx->k2, EVP_MAX_BLOCK_LENGTH); + OPENSSL_cleanse(ctx->last_block, EVP_MAX_BLOCK_LENGTH); + ctx->nlast_block = -1; + } + +EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx) + { + return &ctx->cctx; + } + +void CMAC_CTX_free(CMAC_CTX *ctx) + { + CMAC_CTX_cleanup(ctx); + OPENSSL_free(ctx); + } + +int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in) + { + int bl; + if (in->nlast_block == -1) + return 0; + if (!EVP_CIPHER_CTX_copy(&out->cctx, &in->cctx)) + return 0; + bl = EVP_CIPHER_CTX_block_size(&in->cctx); + memcpy(out->k1, in->k1, bl); + memcpy(out->k2, in->k2, bl); + memcpy(out->tbl, in->tbl, bl); + memcpy(out->last_block, in->last_block, bl); + out->nlast_block = in->nlast_block; + return 1; + } + +int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, + const EVP_CIPHER *cipher, ENGINE *impl) + { + static unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH]; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + { + /* If we have an ENGINE need to allow non FIPS */ + if ((impl || ctx->cctx.engine) + && !(ctx->cctx.flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW)) + + { + EVPerr(EVP_F_CMAC_INIT, EVP_R_DISABLED_FOR_FIPS); + return 0; + } + /* Other algorithm blocking will be done in FIPS_cmac_init, + * via FIPS_cipherinit(). + */ + if (!impl && !ctx->cctx.engine) + return FIPS_cmac_init(ctx, key, keylen, cipher, NULL); + } +#endif + /* All zeros means restart */ + if (!key && !cipher && !impl && keylen == 0) + { + /* Not initialised */ + if (ctx->nlast_block == -1) + return 0; + if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, zero_iv)) + return 0; + memset(ctx->tbl, 0, EVP_CIPHER_CTX_block_size(&ctx->cctx)); + ctx->nlast_block = 0; + return 1; + } + /* Initialiase context */ + if (cipher && !EVP_EncryptInit_ex(&ctx->cctx, cipher, impl, NULL, NULL)) + return 0; + /* Non-NULL key means initialisation complete */ + if (key) + { + int bl; + if (!EVP_CIPHER_CTX_cipher(&ctx->cctx)) + return 0; + if (!EVP_CIPHER_CTX_set_key_length(&ctx->cctx, keylen)) + return 0; + if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, key, zero_iv)) + return 0; + bl = EVP_CIPHER_CTX_block_size(&ctx->cctx); + if (!EVP_Cipher(&ctx->cctx, ctx->tbl, zero_iv, bl)) + return 0; + make_kn(ctx->k1, ctx->tbl, bl); + make_kn(ctx->k2, ctx->k1, bl); + OPENSSL_cleanse(ctx->tbl, bl); + /* Reset context again ready for first data block */ + if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, zero_iv)) + return 0; + /* Zero tbl so resume works */ + memset(ctx->tbl, 0, bl); + ctx->nlast_block = 0; + } + return 1; + } + +int CMAC_Update(CMAC_CTX *ctx, const void *in, size_t dlen) + { + const unsigned char *data = in; + size_t bl; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->cctx.engine) + return FIPS_cmac_update(ctx, in, dlen); +#endif + if (ctx->nlast_block == -1) + return 0; + if (dlen == 0) + return 1; + bl = EVP_CIPHER_CTX_block_size(&ctx->cctx); + /* Copy into partial block if we need to */ + if (ctx->nlast_block > 0) + { + size_t nleft; + nleft = bl - ctx->nlast_block; + if (dlen < nleft) + nleft = dlen; + memcpy(ctx->last_block + ctx->nlast_block, data, nleft); + dlen -= nleft; + ctx->nlast_block += nleft; + /* If no more to process return */ + if (dlen == 0) + return 1; + data += nleft; + /* Else not final block so encrypt it */ + if (!EVP_Cipher(&ctx->cctx, ctx->tbl, ctx->last_block,bl)) + return 0; + } + /* Encrypt all but one of the complete blocks left */ + while(dlen > bl) + { + if (!EVP_Cipher(&ctx->cctx, ctx->tbl, data, bl)) + return 0; + dlen -= bl; + data += bl; + } + /* Copy any data left to last block buffer */ + memcpy(ctx->last_block, data, dlen); + ctx->nlast_block = dlen; + return 1; + + } + +int CMAC_Final(CMAC_CTX *ctx, unsigned char *out, size_t *poutlen) + { + int i, bl, lb; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->cctx.engine) + return FIPS_cmac_final(ctx, out, poutlen); +#endif + if (ctx->nlast_block == -1) + return 0; + bl = EVP_CIPHER_CTX_block_size(&ctx->cctx); + *poutlen = (size_t)bl; + if (!out) + return 1; + lb = ctx->nlast_block; + /* Is last block complete? */ + if (lb == bl) + { + for (i = 0; i < bl; i++) + out[i] = ctx->last_block[i] ^ ctx->k1[i]; + } + else + { + ctx->last_block[lb] = 0x80; + if (bl - lb > 1) + memset(ctx->last_block + lb + 1, 0, bl - lb - 1); + for (i = 0; i < bl; i++) + out[i] = ctx->last_block[i] ^ ctx->k2[i]; + } + if (!EVP_Cipher(&ctx->cctx, out, out, bl)) + { + OPENSSL_cleanse(out, bl); + return 0; + } + return 1; + } + +int CMAC_resume(CMAC_CTX *ctx) + { + if (ctx->nlast_block == -1) + return 0; + /* The buffer "tbl" containes the last fully encrypted block + * which is the last IV (or all zeroes if no last encrypted block). + * The last block has not been modified since CMAC_final(). + * So reinitliasing using the last decrypted block will allow + * CMAC to continue after calling CMAC_Final(). + */ + return EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, ctx->tbl); + } diff --git a/app/openssl/crypto/cmac/cmac.h b/app/openssl/crypto/cmac/cmac.h new file mode 100644 index 00000000..712e92dc --- /dev/null +++ b/app/openssl/crypto/cmac/cmac.h @@ -0,0 +1,82 @@ +/* crypto/cmac/cmac.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + + +#ifndef HEADER_CMAC_H +#define HEADER_CMAC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Opaque */ +typedef struct CMAC_CTX_st CMAC_CTX; + +CMAC_CTX *CMAC_CTX_new(void); +void CMAC_CTX_cleanup(CMAC_CTX *ctx); +void CMAC_CTX_free(CMAC_CTX *ctx); +EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx); +int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in); + +int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, + const EVP_CIPHER *cipher, ENGINE *impl); +int CMAC_Update(CMAC_CTX *ctx, const void *data, size_t dlen); +int CMAC_Final(CMAC_CTX *ctx, unsigned char *out, size_t *poutlen); +int CMAC_resume(CMAC_CTX *ctx); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/app/openssl/crypto/cms/cms.h b/app/openssl/crypto/cms/cms.h new file mode 100644 index 00000000..36994fa6 --- /dev/null +++ b/app/openssl/crypto/cms/cms.h @@ -0,0 +1,501 @@ +/* crypto/cms/cms.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + + +#ifndef HEADER_CMS_H +#define HEADER_CMS_H + +#include + +#ifdef OPENSSL_NO_CMS +#error CMS is disabled. +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct CMS_ContentInfo_st CMS_ContentInfo; +typedef struct CMS_SignerInfo_st CMS_SignerInfo; +typedef struct CMS_CertificateChoices CMS_CertificateChoices; +typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; +typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; +typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; +typedef struct CMS_Receipt_st CMS_Receipt; + +DECLARE_STACK_OF(CMS_SignerInfo) +DECLARE_STACK_OF(GENERAL_NAMES) +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) +DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +#define CMS_SIGNERINFO_ISSUER_SERIAL 0 +#define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +#define CMS_RECIPINFO_TRANS 0 +#define CMS_RECIPINFO_AGREE 1 +#define CMS_RECIPINFO_KEK 2 +#define CMS_RECIPINFO_PASS 3 +#define CMS_RECIPINFO_OTHER 4 + +/* S/MIME related flags */ + +#define CMS_TEXT 0x1 +#define CMS_NOCERTS 0x2 +#define CMS_NO_CONTENT_VERIFY 0x4 +#define CMS_NO_ATTR_VERIFY 0x8 +#define CMS_NOSIGS \ + (CMS_NO_CONTENT_VERIFY|CMS_NO_ATTR_VERIFY) +#define CMS_NOINTERN 0x10 +#define CMS_NO_SIGNER_CERT_VERIFY 0x20 +#define CMS_NOVERIFY 0x20 +#define CMS_DETACHED 0x40 +#define CMS_BINARY 0x80 +#define CMS_NOATTR 0x100 +#define CMS_NOSMIMECAP 0x200 +#define CMS_NOOLDMIMETYPE 0x400 +#define CMS_CRLFEOL 0x800 +#define CMS_STREAM 0x1000 +#define CMS_NOCRL 0x2000 +#define CMS_PARTIAL 0x4000 +#define CMS_REUSE_DIGEST 0x8000 +#define CMS_USE_KEYID 0x10000 +#define CMS_DEBUG_DECRYPT 0x20000 + +const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms); + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont); +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *bio); + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms); +int CMS_is_detached(CMS_ContentInfo *cms); +int CMS_set_detached(CMS_ContentInfo *cms, int detached); + +#ifdef HEADER_PEM_H +DECLARE_PEM_rw_const(CMS, CMS_ContentInfo) +#endif + +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms); +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms); +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms); + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms); +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont); +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags); + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, unsigned int flags); + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, unsigned int flags); + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, + unsigned int flags); + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags); + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags); + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags); + +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, size_t keylen, + unsigned int flags); + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen); + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags); + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags); + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms); + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags); + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pkey, X509 *cert, + BIO *dcont, BIO *out, + unsigned int flags); + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert); +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen); +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen); + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags); +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey); +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert); +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg); +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType); + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype); + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen); + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen); + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, + ossl_ssize_t passlen); + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph); + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags); + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid); +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms); + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms); +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert); +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert); +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms); + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms); +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl); +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl); +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms); + +int CMS_SignedData_init(CMS_ContentInfo *cms); +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags); +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + unsigned int flags); +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, X509 **signer, + X509_ALGOR **pdig, X509_ALGOR **psig); +int CMS_SignerInfo_sign(CMS_SignerInfo *si); +int CMS_SignerInfo_verify(CMS_SignerInfo *si); +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs); +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize); +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap); + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si); +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_signed_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si); +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +#ifdef HEADER_X509V3_H + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0(unsigned char *id, int idlen, + int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo); +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr); +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto); + +#endif + +/* BEGIN ERROR CODES */ +/* The following lines are auto generated by the script mkerr.pl. Any changes + * made after this point may be overwritten when the script is next run. + */ +void ERR_load_CMS_strings(void); + +/* Error codes for the CMS functions. */ + +/* Function codes. */ +#define CMS_F_CHECK_CONTENT 99 +#define CMS_F_CMS_ADD0_CERT 164 +#define CMS_F_CMS_ADD0_RECIPIENT_KEY 100 +#define CMS_F_CMS_ADD0_RECIPIENT_PASSWORD 165 +#define CMS_F_CMS_ADD1_RECEIPTREQUEST 158 +#define CMS_F_CMS_ADD1_RECIPIENT_CERT 101 +#define CMS_F_CMS_ADD1_SIGNER 102 +#define CMS_F_CMS_ADD1_SIGNINGTIME 103 +#define CMS_F_CMS_COMPRESS 104 +#define CMS_F_CMS_COMPRESSEDDATA_CREATE 105 +#define CMS_F_CMS_COMPRESSEDDATA_INIT_BIO 106 +#define CMS_F_CMS_COPY_CONTENT 107 +#define CMS_F_CMS_COPY_MESSAGEDIGEST 108 +#define CMS_F_CMS_DATA 109 +#define CMS_F_CMS_DATAFINAL 110 +#define CMS_F_CMS_DATAINIT 111 +#define CMS_F_CMS_DECRYPT 112 +#define CMS_F_CMS_DECRYPT_SET1_KEY 113 +#define CMS_F_CMS_DECRYPT_SET1_PASSWORD 166 +#define CMS_F_CMS_DECRYPT_SET1_PKEY 114 +#define CMS_F_CMS_DIGESTALGORITHM_FIND_CTX 115 +#define CMS_F_CMS_DIGESTALGORITHM_INIT_BIO 116 +#define CMS_F_CMS_DIGESTEDDATA_DO_FINAL 117 +#define CMS_F_CMS_DIGEST_VERIFY 118 +#define CMS_F_CMS_ENCODE_RECEIPT 161 +#define CMS_F_CMS_ENCRYPT 119 +#define CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO 120 +#define CMS_F_CMS_ENCRYPTEDDATA_DECRYPT 121 +#define CMS_F_CMS_ENCRYPTEDDATA_ENCRYPT 122 +#define CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY 123 +#define CMS_F_CMS_ENVELOPEDDATA_CREATE 124 +#define CMS_F_CMS_ENVELOPEDDATA_INIT_BIO 125 +#define CMS_F_CMS_ENVELOPED_DATA_INIT 126 +#define CMS_F_CMS_FINAL 127 +#define CMS_F_CMS_GET0_CERTIFICATE_CHOICES 128 +#define CMS_F_CMS_GET0_CONTENT 129 +#define CMS_F_CMS_GET0_ECONTENT_TYPE 130 +#define CMS_F_CMS_GET0_ENVELOPED 131 +#define CMS_F_CMS_GET0_REVOCATION_CHOICES 132 +#define CMS_F_CMS_GET0_SIGNED 133 +#define CMS_F_CMS_MSGSIGDIGEST_ADD1 162 +#define CMS_F_CMS_RECEIPTREQUEST_CREATE0 159 +#define CMS_F_CMS_RECEIPT_VERIFY 160 +#define CMS_F_CMS_RECIPIENTINFO_DECRYPT 134 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT 135 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT 136 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID 137 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_ID_CMP 138 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_CERT_CMP 139 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT 140 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT 141 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS 142 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID 143 +#define CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT 167 +#define CMS_F_CMS_RECIPIENTINFO_SET0_KEY 144 +#define CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD 168 +#define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY 145 +#define CMS_F_CMS_SET1_SIGNERIDENTIFIER 146 +#define CMS_F_CMS_SET_DETACHED 147 +#define CMS_F_CMS_SIGN 148 +#define CMS_F_CMS_SIGNED_DATA_INIT 149 +#define CMS_F_CMS_SIGNERINFO_CONTENT_SIGN 150 +#define CMS_F_CMS_SIGNERINFO_SIGN 151 +#define CMS_F_CMS_SIGNERINFO_VERIFY 152 +#define CMS_F_CMS_SIGNERINFO_VERIFY_CERT 153 +#define CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT 154 +#define CMS_F_CMS_SIGN_RECEIPT 163 +#define CMS_F_CMS_STREAM 155 +#define CMS_F_CMS_UNCOMPRESS 156 +#define CMS_F_CMS_VERIFY 157 + +/* Reason codes. */ +#define CMS_R_ADD_SIGNER_ERROR 99 +#define CMS_R_CERTIFICATE_ALREADY_PRESENT 175 +#define CMS_R_CERTIFICATE_HAS_NO_KEYID 160 +#define CMS_R_CERTIFICATE_VERIFY_ERROR 100 +#define CMS_R_CIPHER_INITIALISATION_ERROR 101 +#define CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR 102 +#define CMS_R_CMS_DATAFINAL_ERROR 103 +#define CMS_R_CMS_LIB 104 +#define CMS_R_CONTENTIDENTIFIER_MISMATCH 170 +#define CMS_R_CONTENT_NOT_FOUND 105 +#define CMS_R_CONTENT_TYPE_MISMATCH 171 +#define CMS_R_CONTENT_TYPE_NOT_COMPRESSED_DATA 106 +#define CMS_R_CONTENT_TYPE_NOT_ENVELOPED_DATA 107 +#define CMS_R_CONTENT_TYPE_NOT_SIGNED_DATA 108 +#define CMS_R_CONTENT_VERIFY_ERROR 109 +#define CMS_R_CTRL_ERROR 110 +#define CMS_R_CTRL_FAILURE 111 +#define CMS_R_DECRYPT_ERROR 112 +#define CMS_R_DIGEST_ERROR 161 +#define CMS_R_ERROR_GETTING_PUBLIC_KEY 113 +#define CMS_R_ERROR_READING_MESSAGEDIGEST_ATTRIBUTE 114 +#define CMS_R_ERROR_SETTING_KEY 115 +#define CMS_R_ERROR_SETTING_RECIPIENTINFO 116 +#define CMS_R_INVALID_ENCRYPTED_KEY_LENGTH 117 +#define CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER 176 +#define CMS_R_INVALID_KEY_LENGTH 118 +#define CMS_R_MD_BIO_INIT_ERROR 119 +#define CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH 120 +#define CMS_R_MESSAGEDIGEST_WRONG_LENGTH 121 +#define CMS_R_MSGSIGDIGEST_ERROR 172 +#define CMS_R_MSGSIGDIGEST_VERIFICATION_FAILURE 162 +#define CMS_R_MSGSIGDIGEST_WRONG_LENGTH 163 +#define CMS_R_NEED_ONE_SIGNER 164 +#define CMS_R_NOT_A_SIGNED_RECEIPT 165 +#define CMS_R_NOT_ENCRYPTED_DATA 122 +#define CMS_R_NOT_KEK 123 +#define CMS_R_NOT_KEY_TRANSPORT 124 +#define CMS_R_NOT_PWRI 177 +#define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE 125 +#define CMS_R_NO_CIPHER 126 +#define CMS_R_NO_CONTENT 127 +#define CMS_R_NO_CONTENT_TYPE 173 +#define CMS_R_NO_DEFAULT_DIGEST 128 +#define CMS_R_NO_DIGEST_SET 129 +#define CMS_R_NO_KEY 130 +#define CMS_R_NO_KEY_OR_CERT 174 +#define CMS_R_NO_MATCHING_DIGEST 131 +#define CMS_R_NO_MATCHING_RECIPIENT 132 +#define CMS_R_NO_MATCHING_SIGNATURE 166 +#define CMS_R_NO_MSGSIGDIGEST 167 +#define CMS_R_NO_PASSWORD 178 +#define CMS_R_NO_PRIVATE_KEY 133 +#define CMS_R_NO_PUBLIC_KEY 134 +#define CMS_R_NO_RECEIPT_REQUEST 168 +#define CMS_R_NO_SIGNERS 135 +#define CMS_R_PRIVATE_KEY_DOES_NOT_MATCH_CERTIFICATE 136 +#define CMS_R_RECEIPT_DECODE_ERROR 169 +#define CMS_R_RECIPIENT_ERROR 137 +#define CMS_R_SIGNER_CERTIFICATE_NOT_FOUND 138 +#define CMS_R_SIGNFINAL_ERROR 139 +#define CMS_R_SMIME_TEXT_ERROR 140 +#define CMS_R_STORE_INIT_ERROR 141 +#define CMS_R_TYPE_NOT_COMPRESSED_DATA 142 +#define CMS_R_TYPE_NOT_DATA 143 +#define CMS_R_TYPE_NOT_DIGESTED_DATA 144 +#define CMS_R_TYPE_NOT_ENCRYPTED_DATA 145 +#define CMS_R_TYPE_NOT_ENVELOPED_DATA 146 +#define CMS_R_UNABLE_TO_FINALIZE_CONTEXT 147 +#define CMS_R_UNKNOWN_CIPHER 148 +#define CMS_R_UNKNOWN_DIGEST_ALGORIHM 149 +#define CMS_R_UNKNOWN_ID 150 +#define CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM 151 +#define CMS_R_UNSUPPORTED_CONTENT_TYPE 152 +#define CMS_R_UNSUPPORTED_KEK_ALGORITHM 153 +#define CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM 179 +#define CMS_R_UNSUPPORTED_RECIPIENT_TYPE 154 +#define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE 155 +#define CMS_R_UNSUPPORTED_TYPE 156 +#define CMS_R_UNWRAP_ERROR 157 +#define CMS_R_UNWRAP_FAILURE 180 +#define CMS_R_VERIFICATION_FAILURE 158 +#define CMS_R_WRAP_ERROR 159 + +#ifdef __cplusplus +} +#endif +#endif diff --git a/app/openssl/crypto/cms/cms_asn1.c b/app/openssl/crypto/cms/cms_asn1.c new file mode 100644 index 00000000..cfe67fb6 --- /dev/null +++ b/app/openssl/crypto/cms/cms_asn1.c @@ -0,0 +1,389 @@ +/* crypto/cms/cms_asn1.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include "cms.h" +#include "cms_lcl.h" + + +ASN1_SEQUENCE(CMS_IssuerAndSerialNumber) = { + ASN1_SIMPLE(CMS_IssuerAndSerialNumber, issuer, X509_NAME), + ASN1_SIMPLE(CMS_IssuerAndSerialNumber, serialNumber, ASN1_INTEGER) +} ASN1_SEQUENCE_END(CMS_IssuerAndSerialNumber) + +ASN1_SEQUENCE(CMS_OtherCertificateFormat) = { + ASN1_SIMPLE(CMS_OtherCertificateFormat, otherCertFormat, ASN1_OBJECT), + ASN1_OPT(CMS_OtherCertificateFormat, otherCert, ASN1_ANY) +} ASN1_SEQUENCE_END(CMS_OtherCertificateFormat) + +ASN1_CHOICE(CMS_CertificateChoices) = { + ASN1_SIMPLE(CMS_CertificateChoices, d.certificate, X509), + ASN1_IMP(CMS_CertificateChoices, d.extendedCertificate, ASN1_SEQUENCE, 0), + ASN1_IMP(CMS_CertificateChoices, d.v1AttrCert, ASN1_SEQUENCE, 1), + ASN1_IMP(CMS_CertificateChoices, d.v2AttrCert, ASN1_SEQUENCE, 2), + ASN1_IMP(CMS_CertificateChoices, d.other, CMS_OtherCertificateFormat, 3) +} ASN1_CHOICE_END(CMS_CertificateChoices) + +ASN1_CHOICE(CMS_SignerIdentifier) = { + ASN1_SIMPLE(CMS_SignerIdentifier, d.issuerAndSerialNumber, CMS_IssuerAndSerialNumber), + ASN1_IMP(CMS_SignerIdentifier, d.subjectKeyIdentifier, ASN1_OCTET_STRING, 0) +} ASN1_CHOICE_END(CMS_SignerIdentifier) + +ASN1_NDEF_SEQUENCE(CMS_EncapsulatedContentInfo) = { + ASN1_SIMPLE(CMS_EncapsulatedContentInfo, eContentType, ASN1_OBJECT), + ASN1_NDEF_EXP_OPT(CMS_EncapsulatedContentInfo, eContent, ASN1_OCTET_STRING_NDEF, 0) +} ASN1_NDEF_SEQUENCE_END(CMS_EncapsulatedContentInfo) + +/* Minor tweak to operation: free up signer key, cert */ +static int cms_si_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) + { + if(operation == ASN1_OP_FREE_POST) + { + CMS_SignerInfo *si = (CMS_SignerInfo *)*pval; + if (si->pkey) + EVP_PKEY_free(si->pkey); + if (si->signer) + X509_free(si->signer); + } + return 1; + } + +ASN1_SEQUENCE_cb(CMS_SignerInfo, cms_si_cb) = { + ASN1_SIMPLE(CMS_SignerInfo, version, LONG), + ASN1_SIMPLE(CMS_SignerInfo, sid, CMS_SignerIdentifier), + ASN1_SIMPLE(CMS_SignerInfo, digestAlgorithm, X509_ALGOR), + ASN1_IMP_SET_OF_OPT(CMS_SignerInfo, signedAttrs, X509_ATTRIBUTE, 0), + ASN1_SIMPLE(CMS_SignerInfo, signatureAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_SignerInfo, signature, ASN1_OCTET_STRING), + ASN1_IMP_SET_OF_OPT(CMS_SignerInfo, unsignedAttrs, X509_ATTRIBUTE, 1) +} ASN1_SEQUENCE_END_cb(CMS_SignerInfo, CMS_SignerInfo) + +ASN1_SEQUENCE(CMS_OtherRevocationInfoFormat) = { + ASN1_SIMPLE(CMS_OtherRevocationInfoFormat, otherRevInfoFormat, ASN1_OBJECT), + ASN1_OPT(CMS_OtherRevocationInfoFormat, otherRevInfo, ASN1_ANY) +} ASN1_SEQUENCE_END(CMS_OtherRevocationInfoFormat) + +ASN1_CHOICE(CMS_RevocationInfoChoice) = { + ASN1_SIMPLE(CMS_RevocationInfoChoice, d.crl, X509_CRL), + ASN1_IMP(CMS_RevocationInfoChoice, d.other, CMS_OtherRevocationInfoFormat, 1) +} ASN1_CHOICE_END(CMS_RevocationInfoChoice) + +ASN1_NDEF_SEQUENCE(CMS_SignedData) = { + ASN1_SIMPLE(CMS_SignedData, version, LONG), + ASN1_SET_OF(CMS_SignedData, digestAlgorithms, X509_ALGOR), + ASN1_SIMPLE(CMS_SignedData, encapContentInfo, CMS_EncapsulatedContentInfo), + ASN1_IMP_SET_OF_OPT(CMS_SignedData, certificates, CMS_CertificateChoices, 0), + ASN1_IMP_SET_OF_OPT(CMS_SignedData, crls, CMS_RevocationInfoChoice, 1), + ASN1_SET_OF(CMS_SignedData, signerInfos, CMS_SignerInfo) +} ASN1_NDEF_SEQUENCE_END(CMS_SignedData) + +ASN1_SEQUENCE(CMS_OriginatorInfo) = { + ASN1_IMP_SET_OF_OPT(CMS_OriginatorInfo, certificates, CMS_CertificateChoices, 0), + ASN1_IMP_SET_OF_OPT(CMS_OriginatorInfo, crls, CMS_RevocationInfoChoice, 1) +} ASN1_SEQUENCE_END(CMS_OriginatorInfo) + +ASN1_NDEF_SEQUENCE(CMS_EncryptedContentInfo) = { + ASN1_SIMPLE(CMS_EncryptedContentInfo, contentType, ASN1_OBJECT), + ASN1_SIMPLE(CMS_EncryptedContentInfo, contentEncryptionAlgorithm, X509_ALGOR), + ASN1_IMP_OPT(CMS_EncryptedContentInfo, encryptedContent, ASN1_OCTET_STRING_NDEF, 0) +} ASN1_NDEF_SEQUENCE_END(CMS_EncryptedContentInfo) + +ASN1_SEQUENCE(CMS_KeyTransRecipientInfo) = { + ASN1_SIMPLE(CMS_KeyTransRecipientInfo, version, LONG), + ASN1_SIMPLE(CMS_KeyTransRecipientInfo, rid, CMS_SignerIdentifier), + ASN1_SIMPLE(CMS_KeyTransRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_KeyTransRecipientInfo, encryptedKey, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(CMS_KeyTransRecipientInfo) + +ASN1_SEQUENCE(CMS_OtherKeyAttribute) = { + ASN1_SIMPLE(CMS_OtherKeyAttribute, keyAttrId, ASN1_OBJECT), + ASN1_OPT(CMS_OtherKeyAttribute, keyAttr, ASN1_ANY) +} ASN1_SEQUENCE_END(CMS_OtherKeyAttribute) + +ASN1_SEQUENCE(CMS_RecipientKeyIdentifier) = { + ASN1_SIMPLE(CMS_RecipientKeyIdentifier, subjectKeyIdentifier, ASN1_OCTET_STRING), + ASN1_OPT(CMS_RecipientKeyIdentifier, date, ASN1_GENERALIZEDTIME), + ASN1_OPT(CMS_RecipientKeyIdentifier, other, CMS_OtherKeyAttribute) +} ASN1_SEQUENCE_END(CMS_RecipientKeyIdentifier) + +ASN1_CHOICE(CMS_KeyAgreeRecipientIdentifier) = { + ASN1_SIMPLE(CMS_KeyAgreeRecipientIdentifier, d.issuerAndSerialNumber, CMS_IssuerAndSerialNumber), + ASN1_IMP(CMS_KeyAgreeRecipientIdentifier, d.rKeyId, CMS_RecipientKeyIdentifier, 0) +} ASN1_CHOICE_END(CMS_KeyAgreeRecipientIdentifier) + +ASN1_SEQUENCE(CMS_RecipientEncryptedKey) = { + ASN1_SIMPLE(CMS_RecipientEncryptedKey, rid, CMS_KeyAgreeRecipientIdentifier), + ASN1_SIMPLE(CMS_RecipientEncryptedKey, encryptedKey, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(CMS_RecipientEncryptedKey) + +ASN1_SEQUENCE(CMS_OriginatorPublicKey) = { + ASN1_SIMPLE(CMS_OriginatorPublicKey, algorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_OriginatorPublicKey, publicKey, ASN1_BIT_STRING) +} ASN1_SEQUENCE_END(CMS_OriginatorPublicKey) + +ASN1_CHOICE(CMS_OriginatorIdentifierOrKey) = { + ASN1_SIMPLE(CMS_OriginatorIdentifierOrKey, d.issuerAndSerialNumber, CMS_IssuerAndSerialNumber), + ASN1_IMP(CMS_OriginatorIdentifierOrKey, d.subjectKeyIdentifier, ASN1_OCTET_STRING, 0), + ASN1_IMP(CMS_OriginatorIdentifierOrKey, d.originatorKey, CMS_OriginatorPublicKey, 1) +} ASN1_CHOICE_END(CMS_OriginatorIdentifierOrKey) + +ASN1_SEQUENCE(CMS_KeyAgreeRecipientInfo) = { + ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, version, LONG), + ASN1_EXP(CMS_KeyAgreeRecipientInfo, originator, CMS_OriginatorIdentifierOrKey, 0), + ASN1_EXP_OPT(CMS_KeyAgreeRecipientInfo, ukm, ASN1_OCTET_STRING, 1), + ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), + ASN1_SEQUENCE_OF(CMS_KeyAgreeRecipientInfo, recipientEncryptedKeys, CMS_RecipientEncryptedKey) +} ASN1_SEQUENCE_END(CMS_KeyAgreeRecipientInfo) + +ASN1_SEQUENCE(CMS_KEKIdentifier) = { + ASN1_SIMPLE(CMS_KEKIdentifier, keyIdentifier, ASN1_OCTET_STRING), + ASN1_OPT(CMS_KEKIdentifier, date, ASN1_GENERALIZEDTIME), + ASN1_OPT(CMS_KEKIdentifier, other, CMS_OtherKeyAttribute) +} ASN1_SEQUENCE_END(CMS_KEKIdentifier) + +ASN1_SEQUENCE(CMS_KEKRecipientInfo) = { + ASN1_SIMPLE(CMS_KEKRecipientInfo, version, LONG), + ASN1_SIMPLE(CMS_KEKRecipientInfo, kekid, CMS_KEKIdentifier), + ASN1_SIMPLE(CMS_KEKRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_KEKRecipientInfo, encryptedKey, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(CMS_KEKRecipientInfo) + +ASN1_SEQUENCE(CMS_PasswordRecipientInfo) = { + ASN1_SIMPLE(CMS_PasswordRecipientInfo, version, LONG), + ASN1_IMP_OPT(CMS_PasswordRecipientInfo, keyDerivationAlgorithm, X509_ALGOR, 0), + ASN1_SIMPLE(CMS_PasswordRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_PasswordRecipientInfo, encryptedKey, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(CMS_PasswordRecipientInfo) + +ASN1_SEQUENCE(CMS_OtherRecipientInfo) = { + ASN1_SIMPLE(CMS_OtherRecipientInfo, oriType, ASN1_OBJECT), + ASN1_OPT(CMS_OtherRecipientInfo, oriValue, ASN1_ANY) +} ASN1_SEQUENCE_END(CMS_OtherRecipientInfo) + +/* Free up RecipientInfo additional data */ +static int cms_ri_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) + { + if(operation == ASN1_OP_FREE_PRE) + { + CMS_RecipientInfo *ri = (CMS_RecipientInfo *)*pval; + if (ri->type == CMS_RECIPINFO_TRANS) + { + CMS_KeyTransRecipientInfo *ktri = ri->d.ktri; + if (ktri->pkey) + EVP_PKEY_free(ktri->pkey); + if (ktri->recip) + X509_free(ktri->recip); + } + else if (ri->type == CMS_RECIPINFO_KEK) + { + CMS_KEKRecipientInfo *kekri = ri->d.kekri; + if (kekri->key) + { + OPENSSL_cleanse(kekri->key, kekri->keylen); + OPENSSL_free(kekri->key); + } + } + else if (ri->type == CMS_RECIPINFO_PASS) + { + CMS_PasswordRecipientInfo *pwri = ri->d.pwri; + if (pwri->pass) + { + OPENSSL_cleanse(pwri->pass, pwri->passlen); + OPENSSL_free(pwri->pass); + } + } + } + return 1; + } + +ASN1_CHOICE_cb(CMS_RecipientInfo, cms_ri_cb) = { + ASN1_SIMPLE(CMS_RecipientInfo, d.ktri, CMS_KeyTransRecipientInfo), + ASN1_IMP(CMS_RecipientInfo, d.kari, CMS_KeyAgreeRecipientInfo, 1), + ASN1_IMP(CMS_RecipientInfo, d.kekri, CMS_KEKRecipientInfo, 2), + ASN1_IMP(CMS_RecipientInfo, d.pwri, CMS_PasswordRecipientInfo, 3), + ASN1_IMP(CMS_RecipientInfo, d.ori, CMS_OtherRecipientInfo, 4) +} ASN1_CHOICE_END_cb(CMS_RecipientInfo, CMS_RecipientInfo, type) + +ASN1_NDEF_SEQUENCE(CMS_EnvelopedData) = { + ASN1_SIMPLE(CMS_EnvelopedData, version, LONG), + ASN1_IMP_OPT(CMS_EnvelopedData, originatorInfo, CMS_OriginatorInfo, 0), + ASN1_SET_OF(CMS_EnvelopedData, recipientInfos, CMS_RecipientInfo), + ASN1_SIMPLE(CMS_EnvelopedData, encryptedContentInfo, CMS_EncryptedContentInfo), + ASN1_IMP_SET_OF_OPT(CMS_EnvelopedData, unprotectedAttrs, X509_ATTRIBUTE, 1) +} ASN1_NDEF_SEQUENCE_END(CMS_EnvelopedData) + +ASN1_NDEF_SEQUENCE(CMS_DigestedData) = { + ASN1_SIMPLE(CMS_DigestedData, version, LONG), + ASN1_SIMPLE(CMS_DigestedData, digestAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_DigestedData, encapContentInfo, CMS_EncapsulatedContentInfo), + ASN1_SIMPLE(CMS_DigestedData, digest, ASN1_OCTET_STRING) +} ASN1_NDEF_SEQUENCE_END(CMS_DigestedData) + +ASN1_NDEF_SEQUENCE(CMS_EncryptedData) = { + ASN1_SIMPLE(CMS_EncryptedData, version, LONG), + ASN1_SIMPLE(CMS_EncryptedData, encryptedContentInfo, CMS_EncryptedContentInfo), + ASN1_IMP_SET_OF_OPT(CMS_EncryptedData, unprotectedAttrs, X509_ATTRIBUTE, 1) +} ASN1_NDEF_SEQUENCE_END(CMS_EncryptedData) + +ASN1_NDEF_SEQUENCE(CMS_AuthenticatedData) = { + ASN1_SIMPLE(CMS_AuthenticatedData, version, LONG), + ASN1_IMP_OPT(CMS_AuthenticatedData, originatorInfo, CMS_OriginatorInfo, 0), + ASN1_SET_OF(CMS_AuthenticatedData, recipientInfos, CMS_RecipientInfo), + ASN1_SIMPLE(CMS_AuthenticatedData, macAlgorithm, X509_ALGOR), + ASN1_IMP(CMS_AuthenticatedData, digestAlgorithm, X509_ALGOR, 1), + ASN1_SIMPLE(CMS_AuthenticatedData, encapContentInfo, CMS_EncapsulatedContentInfo), + ASN1_IMP_SET_OF_OPT(CMS_AuthenticatedData, authAttrs, X509_ALGOR, 2), + ASN1_SIMPLE(CMS_AuthenticatedData, mac, ASN1_OCTET_STRING), + ASN1_IMP_SET_OF_OPT(CMS_AuthenticatedData, unauthAttrs, X509_ALGOR, 3) +} ASN1_NDEF_SEQUENCE_END(CMS_AuthenticatedData) + +ASN1_NDEF_SEQUENCE(CMS_CompressedData) = { + ASN1_SIMPLE(CMS_CompressedData, version, LONG), + ASN1_SIMPLE(CMS_CompressedData, compressionAlgorithm, X509_ALGOR), + ASN1_SIMPLE(CMS_CompressedData, encapContentInfo, CMS_EncapsulatedContentInfo), +} ASN1_NDEF_SEQUENCE_END(CMS_CompressedData) + +/* This is the ANY DEFINED BY table for the top level ContentInfo structure */ + +ASN1_ADB_TEMPLATE(cms_default) = ASN1_EXP(CMS_ContentInfo, d.other, ASN1_ANY, 0); + +ASN1_ADB(CMS_ContentInfo) = { + ADB_ENTRY(NID_pkcs7_data, ASN1_NDEF_EXP(CMS_ContentInfo, d.data, ASN1_OCTET_STRING_NDEF, 0)), + ADB_ENTRY(NID_pkcs7_signed, ASN1_NDEF_EXP(CMS_ContentInfo, d.signedData, CMS_SignedData, 0)), + ADB_ENTRY(NID_pkcs7_enveloped, ASN1_NDEF_EXP(CMS_ContentInfo, d.envelopedData, CMS_EnvelopedData, 0)), + ADB_ENTRY(NID_pkcs7_digest, ASN1_NDEF_EXP(CMS_ContentInfo, d.digestedData, CMS_DigestedData, 0)), + ADB_ENTRY(NID_pkcs7_encrypted, ASN1_NDEF_EXP(CMS_ContentInfo, d.encryptedData, CMS_EncryptedData, 0)), + ADB_ENTRY(NID_id_smime_ct_authData, ASN1_NDEF_EXP(CMS_ContentInfo, d.authenticatedData, CMS_AuthenticatedData, 0)), + ADB_ENTRY(NID_id_smime_ct_compressedData, ASN1_NDEF_EXP(CMS_ContentInfo, d.compressedData, CMS_CompressedData, 0)), +} ASN1_ADB_END(CMS_ContentInfo, 0, contentType, 0, &cms_default_tt, NULL); + +/* CMS streaming support */ +static int cms_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) + { + ASN1_STREAM_ARG *sarg = exarg; + CMS_ContentInfo *cms = NULL; + if (pval) + cms = (CMS_ContentInfo *)*pval; + else + return 1; + switch(operation) + { + + case ASN1_OP_STREAM_PRE: + if (CMS_stream(&sarg->boundary, cms) <= 0) + return 0; + case ASN1_OP_DETACHED_PRE: + sarg->ndef_bio = CMS_dataInit(cms, sarg->out); + if (!sarg->ndef_bio) + return 0; + break; + + case ASN1_OP_STREAM_POST: + case ASN1_OP_DETACHED_POST: + if (CMS_dataFinal(cms, sarg->ndef_bio) <= 0) + return 0; + break; + + } + return 1; + } + +ASN1_NDEF_SEQUENCE_cb(CMS_ContentInfo, cms_cb) = { + ASN1_SIMPLE(CMS_ContentInfo, contentType, ASN1_OBJECT), + ASN1_ADB_OBJECT(CMS_ContentInfo) +} ASN1_NDEF_SEQUENCE_END_cb(CMS_ContentInfo, CMS_ContentInfo) + +/* Specials for signed attributes */ + +/* When signing attributes we want to reorder them to match the sorted + * encoding. + */ + +ASN1_ITEM_TEMPLATE(CMS_Attributes_Sign) = + ASN1_EX_TEMPLATE_TYPE(ASN1_TFLG_SET_ORDER, 0, CMS_ATTRIBUTES, X509_ATTRIBUTE) +ASN1_ITEM_TEMPLATE_END(CMS_Attributes_Sign) + +/* When verifying attributes we need to use the received order. So + * we use SEQUENCE OF and tag it to SET OF + */ + +ASN1_ITEM_TEMPLATE(CMS_Attributes_Verify) = + ASN1_EX_TEMPLATE_TYPE(ASN1_TFLG_SEQUENCE_OF | ASN1_TFLG_IMPTAG | ASN1_TFLG_UNIVERSAL, + V_ASN1_SET, CMS_ATTRIBUTES, X509_ATTRIBUTE) +ASN1_ITEM_TEMPLATE_END(CMS_Attributes_Verify) + + + +ASN1_CHOICE(CMS_ReceiptsFrom) = { + ASN1_IMP(CMS_ReceiptsFrom, d.allOrFirstTier, LONG, 0), + ASN1_IMP_SEQUENCE_OF(CMS_ReceiptsFrom, d.receiptList, GENERAL_NAMES, 1) +} ASN1_CHOICE_END(CMS_ReceiptsFrom) + +ASN1_SEQUENCE(CMS_ReceiptRequest) = { + ASN1_SIMPLE(CMS_ReceiptRequest, signedContentIdentifier, ASN1_OCTET_STRING), + ASN1_SIMPLE(CMS_ReceiptRequest, receiptsFrom, CMS_ReceiptsFrom), + ASN1_SEQUENCE_OF(CMS_ReceiptRequest, receiptsTo, GENERAL_NAMES) +} ASN1_SEQUENCE_END(CMS_ReceiptRequest) + +ASN1_SEQUENCE(CMS_Receipt) = { + ASN1_SIMPLE(CMS_Receipt, version, LONG), + ASN1_SIMPLE(CMS_Receipt, contentType, ASN1_OBJECT), + ASN1_SIMPLE(CMS_Receipt, signedContentIdentifier, ASN1_OCTET_STRING), + ASN1_SIMPLE(CMS_Receipt, originatorSignatureValue, ASN1_OCTET_STRING) +} ASN1_SEQUENCE_END(CMS_Receipt) + diff --git a/app/openssl/crypto/cms/cms_att.c b/app/openssl/crypto/cms/cms_att.c new file mode 100644 index 00000000..5b71722e --- /dev/null +++ b/app/openssl/crypto/cms/cms_att.c @@ -0,0 +1,195 @@ +/* crypto/cms/cms_att.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include +#include "cms.h" +#include "cms_lcl.h" + +/* CMS SignedData Attribute utilities */ + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si) +{ + return X509at_get_attr_count(si->signedAttrs); +} + +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos) +{ + return X509at_get_attr_by_NID(si->signedAttrs, nid, lastpos); +} + +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos) +{ + return X509at_get_attr_by_OBJ(si->signedAttrs, obj, lastpos); +} + +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc) +{ + return X509at_get_attr(si->signedAttrs, loc); +} + +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc) +{ + return X509at_delete_attr(si->signedAttrs, loc); +} + +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr) +{ + if(X509at_add1_attr(&si->signedAttrs, attr)) return 1; + return 0; +} + +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_OBJ(&si->signedAttrs, obj, + type, bytes, len)) return 1; + return 0; +} + +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_NID(&si->signedAttrs, nid, + type, bytes, len)) return 1; + return 0; +} + +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_txt(&si->signedAttrs, attrname, + type, bytes, len)) return 1; + return 0; +} + +void *CMS_signed_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type) +{ + return X509at_get0_data_by_OBJ(si->signedAttrs, oid, lastpos, type); +} + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si) +{ + return X509at_get_attr_count(si->unsignedAttrs); +} + +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos) +{ + return X509at_get_attr_by_NID(si->unsignedAttrs, nid, lastpos); +} + +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos) +{ + return X509at_get_attr_by_OBJ(si->unsignedAttrs, obj, lastpos); +} + +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc) +{ + return X509at_get_attr(si->unsignedAttrs, loc); +} + +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc) +{ + return X509at_delete_attr(si->unsignedAttrs, loc); +} + +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr) +{ + if(X509at_add1_attr(&si->unsignedAttrs, attr)) return 1; + return 0; +} + +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_OBJ(&si->unsignedAttrs, obj, + type, bytes, len)) return 1; + return 0; +} + +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_NID(&si->unsignedAttrs, nid, + type, bytes, len)) return 1; + return 0; +} + +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len) +{ + if(X509at_add1_attr_by_txt(&si->unsignedAttrs, attrname, + type, bytes, len)) return 1; + return 0; +} + +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type) +{ + return X509at_get0_data_by_OBJ(si->unsignedAttrs, oid, lastpos, type); +} + +/* Specific attribute cases */ diff --git a/app/openssl/crypto/cms/cms_cd.c b/app/openssl/crypto/cms/cms_cd.c new file mode 100644 index 00000000..20216881 --- /dev/null +++ b/app/openssl/crypto/cms/cms_cd.c @@ -0,0 +1,136 @@ +/* crypto/cms/cms_cd.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#ifndef OPENSSL_NO_COMP +#include +#endif +#include "cms_lcl.h" + +DECLARE_ASN1_ITEM(CMS_CompressedData) + +#ifdef ZLIB + +/* CMS CompressedData Utilities */ + +CMS_ContentInfo *cms_CompressedData_create(int comp_nid) + { + CMS_ContentInfo *cms; + CMS_CompressedData *cd; + /* Will need something cleverer if there is ever more than one + * compression algorithm or parameters have some meaning... + */ + if (comp_nid != NID_zlib_compression) + { + CMSerr(CMS_F_CMS_COMPRESSEDDATA_CREATE, + CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM); + return NULL; + } + cms = CMS_ContentInfo_new(); + if (!cms) + return NULL; + + cd = M_ASN1_new_of(CMS_CompressedData); + + if (!cd) + goto err; + + cms->contentType = OBJ_nid2obj(NID_id_smime_ct_compressedData); + cms->d.compressedData = cd; + + cd->version = 0; + + X509_ALGOR_set0(cd->compressionAlgorithm, + OBJ_nid2obj(NID_zlib_compression), + V_ASN1_UNDEF, NULL); + + cd->encapContentInfo->eContentType = OBJ_nid2obj(NID_pkcs7_data); + + return cms; + + err: + + if (cms) + CMS_ContentInfo_free(cms); + + return NULL; + } + +BIO *cms_CompressedData_init_bio(CMS_ContentInfo *cms) + { + CMS_CompressedData *cd; + ASN1_OBJECT *compoid; + if (OBJ_obj2nid(cms->contentType) != NID_id_smime_ct_compressedData) + { + CMSerr(CMS_F_CMS_COMPRESSEDDATA_INIT_BIO, + CMS_R_CONTENT_TYPE_NOT_COMPRESSED_DATA); + return NULL; + } + cd = cms->d.compressedData; + X509_ALGOR_get0(&compoid, NULL, NULL, cd->compressionAlgorithm); + if (OBJ_obj2nid(compoid) != NID_zlib_compression) + { + CMSerr(CMS_F_CMS_COMPRESSEDDATA_INIT_BIO, + CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM); + return NULL; + } + return BIO_new(BIO_f_zlib()); + } + +#endif diff --git a/app/openssl/crypto/cms/cms_dd.c b/app/openssl/crypto/cms/cms_dd.c new file mode 100644 index 00000000..8919c15b --- /dev/null +++ b/app/openssl/crypto/cms/cms_dd.c @@ -0,0 +1,148 @@ +/* crypto/cms/cms_dd.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include "cms_lcl.h" + +DECLARE_ASN1_ITEM(CMS_DigestedData) + +/* CMS DigestedData Utilities */ + +CMS_ContentInfo *cms_DigestedData_create(const EVP_MD *md) + { + CMS_ContentInfo *cms; + CMS_DigestedData *dd; + cms = CMS_ContentInfo_new(); + if (!cms) + return NULL; + + dd = M_ASN1_new_of(CMS_DigestedData); + + if (!dd) + goto err; + + cms->contentType = OBJ_nid2obj(NID_pkcs7_digest); + cms->d.digestedData = dd; + + dd->version = 0; + dd->encapContentInfo->eContentType = OBJ_nid2obj(NID_pkcs7_data); + + cms_DigestAlgorithm_set(dd->digestAlgorithm, md); + + return cms; + + err: + + if (cms) + CMS_ContentInfo_free(cms); + + return NULL; + } + +BIO *cms_DigestedData_init_bio(CMS_ContentInfo *cms) + { + CMS_DigestedData *dd; + dd = cms->d.digestedData; + return cms_DigestAlgorithm_init_bio(dd->digestAlgorithm); + } + +int cms_DigestedData_do_final(CMS_ContentInfo *cms, BIO *chain, int verify) + { + EVP_MD_CTX mctx; + unsigned char md[EVP_MAX_MD_SIZE]; + unsigned int mdlen; + int r = 0; + CMS_DigestedData *dd; + EVP_MD_CTX_init(&mctx); + + dd = cms->d.digestedData; + + if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, dd->digestAlgorithm)) + goto err; + + if (EVP_DigestFinal_ex(&mctx, md, &mdlen) <= 0) + goto err; + + if (verify) + { + if (mdlen != (unsigned int)dd->digest->length) + { + CMSerr(CMS_F_CMS_DIGESTEDDATA_DO_FINAL, + CMS_R_MESSAGEDIGEST_WRONG_LENGTH); + goto err; + } + + if (memcmp(md, dd->digest->data, mdlen)) + CMSerr(CMS_F_CMS_DIGESTEDDATA_DO_FINAL, + CMS_R_VERIFICATION_FAILURE); + else + r = 1; + } + else + { + if (!ASN1_STRING_set(dd->digest, md, mdlen)) + goto err; + r = 1; + } + + err: + EVP_MD_CTX_cleanup(&mctx); + + return r; + + } diff --git a/app/openssl/crypto/cms/cms_enc.c b/app/openssl/crypto/cms/cms_enc.c new file mode 100644 index 00000000..bebeaf29 --- /dev/null +++ b/app/openssl/crypto/cms/cms_enc.c @@ -0,0 +1,294 @@ +/* crypto/cms/cms_enc.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" + +/* CMS EncryptedData Utilities */ + +DECLARE_ASN1_ITEM(CMS_EncryptedData) + +/* Return BIO based on EncryptedContentInfo and key */ + +BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec) + { + BIO *b; + EVP_CIPHER_CTX *ctx; + const EVP_CIPHER *ciph; + X509_ALGOR *calg = ec->contentEncryptionAlgorithm; + unsigned char iv[EVP_MAX_IV_LENGTH], *piv = NULL; + unsigned char *tkey = NULL; + size_t tkeylen = 0; + + int ok = 0; + + int enc, keep_key = 0; + + enc = ec->cipher ? 1 : 0; + + b = BIO_new(BIO_f_cipher()); + if (!b) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + ERR_R_MALLOC_FAILURE); + return NULL; + } + + BIO_get_cipher_ctx(b, &ctx); + + if (enc) + { + ciph = ec->cipher; + /* If not keeping key set cipher to NULL so subsequent calls + * decrypt. + */ + if (ec->key) + ec->cipher = NULL; + } + else + { + ciph = EVP_get_cipherbyobj(calg->algorithm); + + if (!ciph) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_UNKNOWN_CIPHER); + goto err; + } + } + + if (EVP_CipherInit_ex(ctx, ciph, NULL, NULL, NULL, enc) <= 0) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_CIPHER_INITIALISATION_ERROR); + goto err; + } + + if (enc) + { + int ivlen; + calg->algorithm = OBJ_nid2obj(EVP_CIPHER_CTX_type(ctx)); + /* Generate a random IV if we need one */ + ivlen = EVP_CIPHER_CTX_iv_length(ctx); + if (ivlen > 0) + { + if (RAND_pseudo_bytes(iv, ivlen) <= 0) + goto err; + piv = iv; + } + } + else if (EVP_CIPHER_asn1_to_param(ctx, calg->parameter) <= 0) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); + goto err; + } + tkeylen = EVP_CIPHER_CTX_key_length(ctx); + /* Generate random session key */ + if (!enc || !ec->key) + { + tkey = OPENSSL_malloc(tkeylen); + if (!tkey) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + ERR_R_MALLOC_FAILURE); + goto err; + } + if (EVP_CIPHER_CTX_rand_key(ctx, tkey) <= 0) + goto err; + } + + if (!ec->key) + { + ec->key = tkey; + ec->keylen = tkeylen; + tkey = NULL; + if (enc) + keep_key = 1; + else + ERR_clear_error(); + + } + + if (ec->keylen != tkeylen) + { + /* If necessary set key length */ + if (EVP_CIPHER_CTX_set_key_length(ctx, ec->keylen) <= 0) + { + /* Only reveal failure if debugging so we don't + * leak information which may be useful in MMA. + */ + if (enc || ec->debug) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_INVALID_KEY_LENGTH); + goto err; + } + else + { + /* Use random key */ + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + ec->key = tkey; + ec->keylen = tkeylen; + tkey = NULL; + ERR_clear_error(); + } + } + } + + if (EVP_CipherInit_ex(ctx, NULL, NULL, ec->key, piv, enc) <= 0) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_CIPHER_INITIALISATION_ERROR); + goto err; + } + + if (piv) + { + calg->parameter = ASN1_TYPE_new(); + if (!calg->parameter) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + ERR_R_MALLOC_FAILURE); + goto err; + } + if (EVP_CIPHER_param_to_asn1(ctx, calg->parameter) <= 0) + { + CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, + CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); + goto err; + } + } + ok = 1; + + err: + if (ec->key && !keep_key) + { + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + ec->key = NULL; + } + if (tkey) + { + OPENSSL_cleanse(tkey, tkeylen); + OPENSSL_free(tkey); + } + if (ok) + return b; + BIO_free(b); + return NULL; + } + +int cms_EncryptedContent_init(CMS_EncryptedContentInfo *ec, + const EVP_CIPHER *cipher, + const unsigned char *key, size_t keylen) + { + ec->cipher = cipher; + if (key) + { + ec->key = OPENSSL_malloc(keylen); + if (!ec->key) + return 0; + memcpy(ec->key, key, keylen); + } + ec->keylen = keylen; + if (cipher) + ec->contentType = OBJ_nid2obj(NID_pkcs7_data); + return 1; + } + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen) + { + CMS_EncryptedContentInfo *ec; + if (!key || !keylen) + { + CMSerr(CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY, CMS_R_NO_KEY); + return 0; + } + if (ciph) + { + cms->d.encryptedData = M_ASN1_new_of(CMS_EncryptedData); + if (!cms->d.encryptedData) + { + CMSerr(CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY, + ERR_R_MALLOC_FAILURE); + return 0; + } + cms->contentType = OBJ_nid2obj(NID_pkcs7_encrypted); + cms->d.encryptedData->version = 0; + } + else if (OBJ_obj2nid(cms->contentType) != NID_pkcs7_encrypted) + { + CMSerr(CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY, + CMS_R_NOT_ENCRYPTED_DATA); + return 0; + } + ec = cms->d.encryptedData->encryptedContentInfo; + return cms_EncryptedContent_init(ec, ciph, key, keylen); + } + +BIO *cms_EncryptedData_init_bio(CMS_ContentInfo *cms) + { + CMS_EncryptedData *enc = cms->d.encryptedData; + if (enc->encryptedContentInfo->cipher && enc->unprotectedAttrs) + enc->version = 2; + return cms_EncryptedContent_init_bio(enc->encryptedContentInfo); + } diff --git a/app/openssl/crypto/cms/cms_env.c b/app/openssl/crypto/cms/cms_env.c new file mode 100644 index 00000000..be20b1c0 --- /dev/null +++ b/app/openssl/crypto/cms/cms_env.c @@ -0,0 +1,876 @@ +/* crypto/cms/cms_env.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" +#include "asn1_locl.h" + +/* CMS EnvelopedData Utilities */ + +DECLARE_ASN1_ITEM(CMS_EnvelopedData) +DECLARE_ASN1_ITEM(CMS_KeyTransRecipientInfo) +DECLARE_ASN1_ITEM(CMS_KEKRecipientInfo) +DECLARE_ASN1_ITEM(CMS_OtherKeyAttribute) + +DECLARE_STACK_OF(CMS_RecipientInfo) + +CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms) + { + if (OBJ_obj2nid(cms->contentType) != NID_pkcs7_enveloped) + { + CMSerr(CMS_F_CMS_GET0_ENVELOPED, + CMS_R_CONTENT_TYPE_NOT_ENVELOPED_DATA); + return NULL; + } + return cms->d.envelopedData; + } + +static CMS_EnvelopedData *cms_enveloped_data_init(CMS_ContentInfo *cms) + { + if (cms->d.other == NULL) + { + cms->d.envelopedData = M_ASN1_new_of(CMS_EnvelopedData); + if (!cms->d.envelopedData) + { + CMSerr(CMS_F_CMS_ENVELOPED_DATA_INIT, + ERR_R_MALLOC_FAILURE); + return NULL; + } + cms->d.envelopedData->version = 0; + cms->d.envelopedData->encryptedContentInfo->contentType = + OBJ_nid2obj(NID_pkcs7_data); + ASN1_OBJECT_free(cms->contentType); + cms->contentType = OBJ_nid2obj(NID_pkcs7_enveloped); + return cms->d.envelopedData; + } + return cms_get0_enveloped(cms); + } + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms) + { + CMS_EnvelopedData *env; + env = cms_get0_enveloped(cms); + if (!env) + return NULL; + return env->recipientInfos; + } + +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri) + { + return ri->type; + } + +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher) + { + CMS_ContentInfo *cms; + CMS_EnvelopedData *env; + cms = CMS_ContentInfo_new(); + if (!cms) + goto merr; + env = cms_enveloped_data_init(cms); + if (!env) + goto merr; + if (!cms_EncryptedContent_init(env->encryptedContentInfo, + cipher, NULL, 0)) + goto merr; + return cms; + merr: + if (cms) + CMS_ContentInfo_free(cms); + CMSerr(CMS_F_CMS_ENVELOPEDDATA_CREATE, ERR_R_MALLOC_FAILURE); + return NULL; + } + +/* Key Transport Recipient Info (KTRI) routines */ + +/* Add a recipient certificate. For now only handle key transport. + * If we ever handle key agreement will need updating. + */ + +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags) + { + CMS_RecipientInfo *ri = NULL; + CMS_KeyTransRecipientInfo *ktri; + CMS_EnvelopedData *env; + EVP_PKEY *pk = NULL; + int i, type; + env = cms_get0_enveloped(cms); + if (!env) + goto err; + + /* Initialize recipient info */ + ri = M_ASN1_new_of(CMS_RecipientInfo); + if (!ri) + goto merr; + + /* Initialize and add key transport recipient info */ + + ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo); + if (!ri->d.ktri) + goto merr; + ri->type = CMS_RECIPINFO_TRANS; + + ktri = ri->d.ktri; + + X509_check_purpose(recip, -1, -1); + pk = X509_get_pubkey(recip); + if (!pk) + { + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, + CMS_R_ERROR_GETTING_PUBLIC_KEY); + goto err; + } + CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509); + ktri->pkey = pk; + ktri->recip = recip; + + if (flags & CMS_USE_KEYID) + { + ktri->version = 2; + type = CMS_RECIPINFO_KEYIDENTIFIER; + } + else + { + ktri->version = 0; + type = CMS_RECIPINFO_ISSUER_SERIAL; + } + + /* Not a typo: RecipientIdentifier and SignerIdentifier are the + * same structure. + */ + + if (!cms_set1_SignerIdentifier(ktri->rid, recip, type)) + goto err; + + if (pk->ameth && pk->ameth->pkey_ctrl) + { + i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_ENVELOPE, + 0, ri); + if (i == -2) + { + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + goto err; + } + if (i <= 0) + { + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, + CMS_R_CTRL_FAILURE); + goto err; + } + } + + if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri)) + goto merr; + + return ri; + + merr: + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, ERR_R_MALLOC_FAILURE); + err: + if (ri) + M_ASN1_free_of(ri, CMS_RecipientInfo); + return NULL; + + } + +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg) + { + CMS_KeyTransRecipientInfo *ktri; + if (ri->type != CMS_RECIPINFO_TRANS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS, + CMS_R_NOT_KEY_TRANSPORT); + return 0; + } + + ktri = ri->d.ktri; + + if (pk) + *pk = ktri->pkey; + if (recip) + *recip = ktri->recip; + if (palg) + *palg = ktri->keyEncryptionAlgorithm; + return 1; + } + +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno) + { + CMS_KeyTransRecipientInfo *ktri; + if (ri->type != CMS_RECIPINFO_TRANS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID, + CMS_R_NOT_KEY_TRANSPORT); + return 0; + } + ktri = ri->d.ktri; + + return cms_SignerIdentifier_get0_signer_id(ktri->rid, + keyid, issuer, sno); + } + +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert) + { + if (ri->type != CMS_RECIPINFO_TRANS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_CERT_CMP, + CMS_R_NOT_KEY_TRANSPORT); + return -2; + } + return cms_SignerIdentifier_cert_cmp(ri->d.ktri->rid, cert); + } + +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey) + { + if (ri->type != CMS_RECIPINFO_TRANS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY, + CMS_R_NOT_KEY_TRANSPORT); + return 0; + } + ri->d.ktri->pkey = pkey; + return 1; + } + +/* Encrypt content key in key transport recipient info */ + +static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) + { + CMS_KeyTransRecipientInfo *ktri; + CMS_EncryptedContentInfo *ec; + EVP_PKEY_CTX *pctx = NULL; + unsigned char *ek = NULL; + size_t eklen; + + int ret = 0; + + if (ri->type != CMS_RECIPINFO_TRANS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT, + CMS_R_NOT_KEY_TRANSPORT); + return 0; + } + ktri = ri->d.ktri; + ec = cms->d.envelopedData->encryptedContentInfo; + + pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!pctx) + return 0; + + if (EVP_PKEY_encrypt_init(pctx) <= 0) + goto err; + + if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_ENCRYPT, + EVP_PKEY_CTRL_CMS_ENCRYPT, 0, ri) <= 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT, CMS_R_CTRL_ERROR); + goto err; + } + + if (EVP_PKEY_encrypt(pctx, NULL, &eklen, ec->key, ec->keylen) <= 0) + goto err; + + ek = OPENSSL_malloc(eklen); + + if (ek == NULL) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT, + ERR_R_MALLOC_FAILURE); + goto err; + } + + if (EVP_PKEY_encrypt(pctx, ek, &eklen, ec->key, ec->keylen) <= 0) + goto err; + + ASN1_STRING_set0(ktri->encryptedKey, ek, eklen); + ek = NULL; + + ret = 1; + + err: + if (pctx) + EVP_PKEY_CTX_free(pctx); + if (ek) + OPENSSL_free(ek); + return ret; + + } + +/* Decrypt content key from KTRI */ + +static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) + { + CMS_KeyTransRecipientInfo *ktri = ri->d.ktri; + EVP_PKEY_CTX *pctx = NULL; + unsigned char *ek = NULL; + size_t eklen; + int ret = 0; + CMS_EncryptedContentInfo *ec; + ec = cms->d.envelopedData->encryptedContentInfo; + + if (ktri->pkey == NULL) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, + CMS_R_NO_PRIVATE_KEY); + return 0; + } + + pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!pctx) + return 0; + + if (EVP_PKEY_decrypt_init(pctx) <= 0) + goto err; + + if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_DECRYPT, + EVP_PKEY_CTRL_CMS_DECRYPT, 0, ri) <= 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CTRL_ERROR); + goto err; + } + + if (EVP_PKEY_decrypt(pctx, NULL, &eklen, + ktri->encryptedKey->data, + ktri->encryptedKey->length) <= 0) + goto err; + + ek = OPENSSL_malloc(eklen); + + if (ek == NULL) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, + ERR_R_MALLOC_FAILURE); + goto err; + } + + if (EVP_PKEY_decrypt(pctx, ek, &eklen, + ktri->encryptedKey->data, + ktri->encryptedKey->length) <= 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CMS_LIB); + goto err; + } + + ret = 1; + + if (ec->key) + { + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + } + + ec->key = ek; + ec->keylen = eklen; + + err: + if (pctx) + EVP_PKEY_CTX_free(pctx); + if (!ret && ek) + OPENSSL_free(ek); + + return ret; + } + +/* Key Encrypted Key (KEK) RecipientInfo routines */ + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen) + { + ASN1_OCTET_STRING tmp_os; + CMS_KEKRecipientInfo *kekri; + if (ri->type != CMS_RECIPINFO_KEK) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_ID_CMP, CMS_R_NOT_KEK); + return -2; + } + kekri = ri->d.kekri; + tmp_os.type = V_ASN1_OCTET_STRING; + tmp_os.flags = 0; + tmp_os.data = (unsigned char *)id; + tmp_os.length = (int)idlen; + return ASN1_OCTET_STRING_cmp(&tmp_os, kekri->kekid->keyIdentifier); + } + +/* For now hard code AES key wrap info */ + +static size_t aes_wrap_keylen(int nid) + { + switch (nid) + { + case NID_id_aes128_wrap: + return 16; + + case NID_id_aes192_wrap: + return 24; + + case NID_id_aes256_wrap: + return 32; + + default: + return 0; + } + } + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType) + { + CMS_RecipientInfo *ri = NULL; + CMS_EnvelopedData *env; + CMS_KEKRecipientInfo *kekri; + env = cms_get0_enveloped(cms); + if (!env) + goto err; + + if (nid == NID_undef) + { + switch (keylen) + { + case 16: + nid = NID_id_aes128_wrap; + break; + + case 24: + nid = NID_id_aes192_wrap; + break; + + case 32: + nid = NID_id_aes256_wrap; + break; + + default: + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_KEY, + CMS_R_INVALID_KEY_LENGTH); + goto err; + } + + } + else + { + + size_t exp_keylen = aes_wrap_keylen(nid); + + if (!exp_keylen) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_KEY, + CMS_R_UNSUPPORTED_KEK_ALGORITHM); + goto err; + } + + if (keylen != exp_keylen) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_KEY, + CMS_R_INVALID_KEY_LENGTH); + goto err; + } + + } + + /* Initialize recipient info */ + ri = M_ASN1_new_of(CMS_RecipientInfo); + if (!ri) + goto merr; + + ri->d.kekri = M_ASN1_new_of(CMS_KEKRecipientInfo); + if (!ri->d.kekri) + goto merr; + ri->type = CMS_RECIPINFO_KEK; + + kekri = ri->d.kekri; + + if (otherTypeId) + { + kekri->kekid->other = M_ASN1_new_of(CMS_OtherKeyAttribute); + if (kekri->kekid->other == NULL) + goto merr; + } + + if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri)) + goto merr; + + + /* After this point no calls can fail */ + + kekri->version = 4; + + kekri->key = key; + kekri->keylen = keylen; + + ASN1_STRING_set0(kekri->kekid->keyIdentifier, id, idlen); + + kekri->kekid->date = date; + + if (kekri->kekid->other) + { + kekri->kekid->other->keyAttrId = otherTypeId; + kekri->kekid->other->keyAttr = otherType; + } + + X509_ALGOR_set0(kekri->keyEncryptionAlgorithm, + OBJ_nid2obj(nid), V_ASN1_UNDEF, NULL); + + return ri; + + merr: + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_KEY, ERR_R_MALLOC_FAILURE); + err: + if (ri) + M_ASN1_free_of(ri, CMS_RecipientInfo); + return NULL; + + } + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype) + { + CMS_KEKIdentifier *rkid; + if (ri->type != CMS_RECIPINFO_KEK) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID, CMS_R_NOT_KEK); + return 0; + } + rkid = ri->d.kekri->kekid; + if (palg) + *palg = ri->d.kekri->keyEncryptionAlgorithm; + if (pid) + *pid = rkid->keyIdentifier; + if (pdate) + *pdate = rkid->date; + if (potherid) + { + if (rkid->other) + *potherid = rkid->other->keyAttrId; + else + *potherid = NULL; + } + if (pothertype) + { + if (rkid->other) + *pothertype = rkid->other->keyAttr; + else + *pothertype = NULL; + } + return 1; + } + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen) + { + CMS_KEKRecipientInfo *kekri; + if (ri->type != CMS_RECIPINFO_KEK) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_SET0_KEY, CMS_R_NOT_KEK); + return 0; + } + + kekri = ri->d.kekri; + kekri->key = key; + kekri->keylen = keylen; + return 1; + } + + +/* Encrypt content key in KEK recipient info */ + +static int cms_RecipientInfo_kekri_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) + { + CMS_EncryptedContentInfo *ec; + CMS_KEKRecipientInfo *kekri; + AES_KEY actx; + unsigned char *wkey = NULL; + int wkeylen; + int r = 0; + + ec = cms->d.envelopedData->encryptedContentInfo; + + kekri = ri->d.kekri; + + if (!kekri->key) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT, CMS_R_NO_KEY); + return 0; + } + + if (AES_set_encrypt_key(kekri->key, kekri->keylen << 3, &actx)) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT, + CMS_R_ERROR_SETTING_KEY); + goto err; + } + + wkey = OPENSSL_malloc(ec->keylen + 8); + + if (!wkey) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT, + ERR_R_MALLOC_FAILURE); + goto err; + } + + wkeylen = AES_wrap_key(&actx, NULL, wkey, ec->key, ec->keylen); + + if (wkeylen <= 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT, CMS_R_WRAP_ERROR); + goto err; + } + + ASN1_STRING_set0(kekri->encryptedKey, wkey, wkeylen); + + r = 1; + + err: + + if (!r && wkey) + OPENSSL_free(wkey); + OPENSSL_cleanse(&actx, sizeof(actx)); + + return r; + + } + +/* Decrypt content key in KEK recipient info */ + +static int cms_RecipientInfo_kekri_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) + { + CMS_EncryptedContentInfo *ec; + CMS_KEKRecipientInfo *kekri; + AES_KEY actx; + unsigned char *ukey = NULL; + int ukeylen; + int r = 0, wrap_nid; + + ec = cms->d.envelopedData->encryptedContentInfo; + + kekri = ri->d.kekri; + + if (!kekri->key) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, CMS_R_NO_KEY); + return 0; + } + + wrap_nid = OBJ_obj2nid(kekri->keyEncryptionAlgorithm->algorithm); + if (aes_wrap_keylen(wrap_nid) != kekri->keylen) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, + CMS_R_INVALID_KEY_LENGTH); + return 0; + } + + /* If encrypted key length is invalid don't bother */ + + if (kekri->encryptedKey->length < 16) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, + CMS_R_INVALID_ENCRYPTED_KEY_LENGTH); + goto err; + } + + if (AES_set_decrypt_key(kekri->key, kekri->keylen << 3, &actx)) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, + CMS_R_ERROR_SETTING_KEY); + goto err; + } + + ukey = OPENSSL_malloc(kekri->encryptedKey->length - 8); + + if (!ukey) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, + ERR_R_MALLOC_FAILURE); + goto err; + } + + ukeylen = AES_unwrap_key(&actx, NULL, ukey, + kekri->encryptedKey->data, + kekri->encryptedKey->length); + + if (ukeylen <= 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT, + CMS_R_UNWRAP_ERROR); + goto err; + } + + ec->key = ukey; + ec->keylen = ukeylen; + + r = 1; + + err: + + if (!r && ukey) + OPENSSL_free(ukey); + OPENSSL_cleanse(&actx, sizeof(actx)); + + return r; + + } + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) + { + switch(ri->type) + { + case CMS_RECIPINFO_TRANS: + return cms_RecipientInfo_ktri_decrypt(cms, ri); + + case CMS_RECIPINFO_KEK: + return cms_RecipientInfo_kekri_decrypt(cms, ri); + + case CMS_RECIPINFO_PASS: + return cms_RecipientInfo_pwri_crypt(cms, ri, 0); + + default: + CMSerr(CMS_F_CMS_RECIPIENTINFO_DECRYPT, + CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE); + return 0; + } + } + +BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) + { + CMS_EncryptedContentInfo *ec; + STACK_OF(CMS_RecipientInfo) *rinfos; + CMS_RecipientInfo *ri; + int i, r, ok = 0; + BIO *ret; + + /* Get BIO first to set up key */ + + ec = cms->d.envelopedData->encryptedContentInfo; + ret = cms_EncryptedContent_init_bio(ec); + + /* If error or no cipher end of processing */ + + if (!ret || !ec->cipher) + return ret; + + /* Now encrypt content key according to each RecipientInfo type */ + + rinfos = cms->d.envelopedData->recipientInfos; + + for (i = 0; i < sk_CMS_RecipientInfo_num(rinfos); i++) + { + ri = sk_CMS_RecipientInfo_value(rinfos, i); + + switch (ri->type) + { + case CMS_RECIPINFO_TRANS: + r = cms_RecipientInfo_ktri_encrypt(cms, ri); + break; + + case CMS_RECIPINFO_KEK: + r = cms_RecipientInfo_kekri_encrypt(cms, ri); + break; + + case CMS_RECIPINFO_PASS: + r = cms_RecipientInfo_pwri_crypt(cms, ri, 1); + break; + + default: + CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, + CMS_R_UNSUPPORTED_RECIPIENT_TYPE); + goto err; + } + + if (r <= 0) + { + CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, + CMS_R_ERROR_SETTING_RECIPIENTINFO); + goto err; + } + } + + ok = 1; + + err: + ec->cipher = NULL; + if (ec->key) + { + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + ec->key = NULL; + ec->keylen = 0; + } + if (ok) + return ret; + BIO_free(ret); + return NULL; + + } diff --git a/app/openssl/crypto/cms/cms_err.c b/app/openssl/crypto/cms/cms_err.c new file mode 100644 index 00000000..8330ead7 --- /dev/null +++ b/app/openssl/crypto/cms/cms_err.c @@ -0,0 +1,245 @@ +/* crypto/cms/cms_err.c */ +/* ==================================================================== + * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +/* NOTE: this file was auto generated by the mkerr.pl script: any changes + * made to it will be overwritten when the script next updates this file, + * only reason strings will be preserved. + */ + +#include +#include +#include + +/* BEGIN ERROR CODES */ +#ifndef OPENSSL_NO_ERR + +#define ERR_FUNC(func) ERR_PACK(ERR_LIB_CMS,func,0) +#define ERR_REASON(reason) ERR_PACK(ERR_LIB_CMS,0,reason) + +static ERR_STRING_DATA CMS_str_functs[]= + { +{ERR_FUNC(CMS_F_CHECK_CONTENT), "CHECK_CONTENT"}, +{ERR_FUNC(CMS_F_CMS_ADD0_CERT), "CMS_add0_cert"}, +{ERR_FUNC(CMS_F_CMS_ADD0_RECIPIENT_KEY), "CMS_add0_recipient_key"}, +{ERR_FUNC(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD), "CMS_add0_recipient_password"}, +{ERR_FUNC(CMS_F_CMS_ADD1_RECEIPTREQUEST), "CMS_add1_ReceiptRequest"}, +{ERR_FUNC(CMS_F_CMS_ADD1_RECIPIENT_CERT), "CMS_add1_recipient_cert"}, +{ERR_FUNC(CMS_F_CMS_ADD1_SIGNER), "CMS_add1_signer"}, +{ERR_FUNC(CMS_F_CMS_ADD1_SIGNINGTIME), "CMS_ADD1_SIGNINGTIME"}, +{ERR_FUNC(CMS_F_CMS_COMPRESS), "CMS_compress"}, +{ERR_FUNC(CMS_F_CMS_COMPRESSEDDATA_CREATE), "cms_CompressedData_create"}, +{ERR_FUNC(CMS_F_CMS_COMPRESSEDDATA_INIT_BIO), "cms_CompressedData_init_bio"}, +{ERR_FUNC(CMS_F_CMS_COPY_CONTENT), "CMS_COPY_CONTENT"}, +{ERR_FUNC(CMS_F_CMS_COPY_MESSAGEDIGEST), "CMS_COPY_MESSAGEDIGEST"}, +{ERR_FUNC(CMS_F_CMS_DATA), "CMS_data"}, +{ERR_FUNC(CMS_F_CMS_DATAFINAL), "CMS_dataFinal"}, +{ERR_FUNC(CMS_F_CMS_DATAINIT), "CMS_dataInit"}, +{ERR_FUNC(CMS_F_CMS_DECRYPT), "CMS_decrypt"}, +{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_KEY), "CMS_decrypt_set1_key"}, +{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_PASSWORD), "CMS_decrypt_set1_password"}, +{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_PKEY), "CMS_decrypt_set1_pkey"}, +{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_FIND_CTX), "cms_DigestAlgorithm_find_ctx"}, +{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_INIT_BIO), "cms_DigestAlgorithm_init_bio"}, +{ERR_FUNC(CMS_F_CMS_DIGESTEDDATA_DO_FINAL), "cms_DigestedData_do_final"}, +{ERR_FUNC(CMS_F_CMS_DIGEST_VERIFY), "CMS_digest_verify"}, +{ERR_FUNC(CMS_F_CMS_ENCODE_RECEIPT), "cms_encode_Receipt"}, +{ERR_FUNC(CMS_F_CMS_ENCRYPT), "CMS_encrypt"}, +{ERR_FUNC(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO), "cms_EncryptedContent_init_bio"}, +{ERR_FUNC(CMS_F_CMS_ENCRYPTEDDATA_DECRYPT), "CMS_EncryptedData_decrypt"}, +{ERR_FUNC(CMS_F_CMS_ENCRYPTEDDATA_ENCRYPT), "CMS_EncryptedData_encrypt"}, +{ERR_FUNC(CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY), "CMS_EncryptedData_set1_key"}, +{ERR_FUNC(CMS_F_CMS_ENVELOPEDDATA_CREATE), "CMS_EnvelopedData_create"}, +{ERR_FUNC(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO), "cms_EnvelopedData_init_bio"}, +{ERR_FUNC(CMS_F_CMS_ENVELOPED_DATA_INIT), "CMS_ENVELOPED_DATA_INIT"}, +{ERR_FUNC(CMS_F_CMS_FINAL), "CMS_final"}, +{ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES), "CMS_GET0_CERTIFICATE_CHOICES"}, +{ERR_FUNC(CMS_F_CMS_GET0_CONTENT), "CMS_get0_content"}, +{ERR_FUNC(CMS_F_CMS_GET0_ECONTENT_TYPE), "CMS_GET0_ECONTENT_TYPE"}, +{ERR_FUNC(CMS_F_CMS_GET0_ENVELOPED), "cms_get0_enveloped"}, +{ERR_FUNC(CMS_F_CMS_GET0_REVOCATION_CHOICES), "CMS_GET0_REVOCATION_CHOICES"}, +{ERR_FUNC(CMS_F_CMS_GET0_SIGNED), "CMS_GET0_SIGNED"}, +{ERR_FUNC(CMS_F_CMS_MSGSIGDIGEST_ADD1), "cms_msgSigDigest_add1"}, +{ERR_FUNC(CMS_F_CMS_RECEIPTREQUEST_CREATE0), "CMS_ReceiptRequest_create0"}, +{ERR_FUNC(CMS_F_CMS_RECEIPT_VERIFY), "cms_Receipt_verify"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_DECRYPT), "CMS_RecipientInfo_decrypt"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT), "CMS_RECIPIENTINFO_KEKRI_DECRYPT"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT), "CMS_RECIPIENTINFO_KEKRI_ENCRYPT"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID), "CMS_RecipientInfo_kekri_get0_id"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_ID_CMP), "CMS_RecipientInfo_kekri_id_cmp"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_CERT_CMP), "CMS_RecipientInfo_ktri_cert_cmp"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT), "CMS_RECIPIENTINFO_KTRI_DECRYPT"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT), "CMS_RECIPIENTINFO_KTRI_ENCRYPT"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS), "CMS_RecipientInfo_ktri_get0_algs"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID), "CMS_RecipientInfo_ktri_get0_signer_id"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT), "cms_RecipientInfo_pwri_crypt"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_KEY), "CMS_RecipientInfo_set0_key"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD), "CMS_RecipientInfo_set0_password"}, +{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY), "CMS_RecipientInfo_set0_pkey"}, +{ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"}, +{ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"}, +{ERR_FUNC(CMS_F_CMS_SIGN), "CMS_sign"}, +{ERR_FUNC(CMS_F_CMS_SIGNED_DATA_INIT), "CMS_SIGNED_DATA_INIT"}, +{ERR_FUNC(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN), "CMS_SIGNERINFO_CONTENT_SIGN"}, +{ERR_FUNC(CMS_F_CMS_SIGNERINFO_SIGN), "CMS_SignerInfo_sign"}, +{ERR_FUNC(CMS_F_CMS_SIGNERINFO_VERIFY), "CMS_SignerInfo_verify"}, +{ERR_FUNC(CMS_F_CMS_SIGNERINFO_VERIFY_CERT), "CMS_SIGNERINFO_VERIFY_CERT"}, +{ERR_FUNC(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT), "CMS_SignerInfo_verify_content"}, +{ERR_FUNC(CMS_F_CMS_SIGN_RECEIPT), "CMS_sign_receipt"}, +{ERR_FUNC(CMS_F_CMS_STREAM), "CMS_stream"}, +{ERR_FUNC(CMS_F_CMS_UNCOMPRESS), "CMS_uncompress"}, +{ERR_FUNC(CMS_F_CMS_VERIFY), "CMS_verify"}, +{0,NULL} + }; + +static ERR_STRING_DATA CMS_str_reasons[]= + { +{ERR_REASON(CMS_R_ADD_SIGNER_ERROR) ,"add signer error"}, +{ERR_REASON(CMS_R_CERTIFICATE_ALREADY_PRESENT),"certificate already present"}, +{ERR_REASON(CMS_R_CERTIFICATE_HAS_NO_KEYID),"certificate has no keyid"}, +{ERR_REASON(CMS_R_CERTIFICATE_VERIFY_ERROR),"certificate verify error"}, +{ERR_REASON(CMS_R_CIPHER_INITIALISATION_ERROR),"cipher initialisation error"}, +{ERR_REASON(CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR),"cipher parameter initialisation error"}, +{ERR_REASON(CMS_R_CMS_DATAFINAL_ERROR) ,"cms datafinal error"}, +{ERR_REASON(CMS_R_CMS_LIB) ,"cms lib"}, +{ERR_REASON(CMS_R_CONTENTIDENTIFIER_MISMATCH),"contentidentifier mismatch"}, +{ERR_REASON(CMS_R_CONTENT_NOT_FOUND) ,"content not found"}, +{ERR_REASON(CMS_R_CONTENT_TYPE_MISMATCH) ,"content type mismatch"}, +{ERR_REASON(CMS_R_CONTENT_TYPE_NOT_COMPRESSED_DATA),"content type not compressed data"}, +{ERR_REASON(CMS_R_CONTENT_TYPE_NOT_ENVELOPED_DATA),"content type not enveloped data"}, +{ERR_REASON(CMS_R_CONTENT_TYPE_NOT_SIGNED_DATA),"content type not signed data"}, +{ERR_REASON(CMS_R_CONTENT_VERIFY_ERROR) ,"content verify error"}, +{ERR_REASON(CMS_R_CTRL_ERROR) ,"ctrl error"}, +{ERR_REASON(CMS_R_CTRL_FAILURE) ,"ctrl failure"}, +{ERR_REASON(CMS_R_DECRYPT_ERROR) ,"decrypt error"}, +{ERR_REASON(CMS_R_DIGEST_ERROR) ,"digest error"}, +{ERR_REASON(CMS_R_ERROR_GETTING_PUBLIC_KEY),"error getting public key"}, +{ERR_REASON(CMS_R_ERROR_READING_MESSAGEDIGEST_ATTRIBUTE),"error reading messagedigest attribute"}, +{ERR_REASON(CMS_R_ERROR_SETTING_KEY) ,"error setting key"}, +{ERR_REASON(CMS_R_ERROR_SETTING_RECIPIENTINFO),"error setting recipientinfo"}, +{ERR_REASON(CMS_R_INVALID_ENCRYPTED_KEY_LENGTH),"invalid encrypted key length"}, +{ERR_REASON(CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER),"invalid key encryption parameter"}, +{ERR_REASON(CMS_R_INVALID_KEY_LENGTH) ,"invalid key length"}, +{ERR_REASON(CMS_R_MD_BIO_INIT_ERROR) ,"md bio init error"}, +{ERR_REASON(CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH),"messagedigest attribute wrong length"}, +{ERR_REASON(CMS_R_MESSAGEDIGEST_WRONG_LENGTH),"messagedigest wrong length"}, +{ERR_REASON(CMS_R_MSGSIGDIGEST_ERROR) ,"msgsigdigest error"}, +{ERR_REASON(CMS_R_MSGSIGDIGEST_VERIFICATION_FAILURE),"msgsigdigest verification failure"}, +{ERR_REASON(CMS_R_MSGSIGDIGEST_WRONG_LENGTH),"msgsigdigest wrong length"}, +{ERR_REASON(CMS_R_NEED_ONE_SIGNER) ,"need one signer"}, +{ERR_REASON(CMS_R_NOT_A_SIGNED_RECEIPT) ,"not a signed receipt"}, +{ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA) ,"not encrypted data"}, +{ERR_REASON(CMS_R_NOT_KEK) ,"not kek"}, +{ERR_REASON(CMS_R_NOT_KEY_TRANSPORT) ,"not key transport"}, +{ERR_REASON(CMS_R_NOT_PWRI) ,"not pwri"}, +{ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE),"not supported for this key type"}, +{ERR_REASON(CMS_R_NO_CIPHER) ,"no cipher"}, +{ERR_REASON(CMS_R_NO_CONTENT) ,"no content"}, +{ERR_REASON(CMS_R_NO_CONTENT_TYPE) ,"no content type"}, +{ERR_REASON(CMS_R_NO_DEFAULT_DIGEST) ,"no default digest"}, +{ERR_REASON(CMS_R_NO_DIGEST_SET) ,"no digest set"}, +{ERR_REASON(CMS_R_NO_KEY) ,"no key"}, +{ERR_REASON(CMS_R_NO_KEY_OR_CERT) ,"no key or cert"}, +{ERR_REASON(CMS_R_NO_MATCHING_DIGEST) ,"no matching digest"}, +{ERR_REASON(CMS_R_NO_MATCHING_RECIPIENT) ,"no matching recipient"}, +{ERR_REASON(CMS_R_NO_MATCHING_SIGNATURE) ,"no matching signature"}, +{ERR_REASON(CMS_R_NO_MSGSIGDIGEST) ,"no msgsigdigest"}, +{ERR_REASON(CMS_R_NO_PASSWORD) ,"no password"}, +{ERR_REASON(CMS_R_NO_PRIVATE_KEY) ,"no private key"}, +{ERR_REASON(CMS_R_NO_PUBLIC_KEY) ,"no public key"}, +{ERR_REASON(CMS_R_NO_RECEIPT_REQUEST) ,"no receipt request"}, +{ERR_REASON(CMS_R_NO_SIGNERS) ,"no signers"}, +{ERR_REASON(CMS_R_PRIVATE_KEY_DOES_NOT_MATCH_CERTIFICATE),"private key does not match certificate"}, +{ERR_REASON(CMS_R_RECEIPT_DECODE_ERROR) ,"receipt decode error"}, +{ERR_REASON(CMS_R_RECIPIENT_ERROR) ,"recipient error"}, +{ERR_REASON(CMS_R_SIGNER_CERTIFICATE_NOT_FOUND),"signer certificate not found"}, +{ERR_REASON(CMS_R_SIGNFINAL_ERROR) ,"signfinal error"}, +{ERR_REASON(CMS_R_SMIME_TEXT_ERROR) ,"smime text error"}, +{ERR_REASON(CMS_R_STORE_INIT_ERROR) ,"store init error"}, +{ERR_REASON(CMS_R_TYPE_NOT_COMPRESSED_DATA),"type not compressed data"}, +{ERR_REASON(CMS_R_TYPE_NOT_DATA) ,"type not data"}, +{ERR_REASON(CMS_R_TYPE_NOT_DIGESTED_DATA),"type not digested data"}, +{ERR_REASON(CMS_R_TYPE_NOT_ENCRYPTED_DATA),"type not encrypted data"}, +{ERR_REASON(CMS_R_TYPE_NOT_ENVELOPED_DATA),"type not enveloped data"}, +{ERR_REASON(CMS_R_UNABLE_TO_FINALIZE_CONTEXT),"unable to finalize context"}, +{ERR_REASON(CMS_R_UNKNOWN_CIPHER) ,"unknown cipher"}, +{ERR_REASON(CMS_R_UNKNOWN_DIGEST_ALGORIHM),"unknown digest algorihm"}, +{ERR_REASON(CMS_R_UNKNOWN_ID) ,"unknown id"}, +{ERR_REASON(CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM),"unsupported compression algorithm"}, +{ERR_REASON(CMS_R_UNSUPPORTED_CONTENT_TYPE),"unsupported content type"}, +{ERR_REASON(CMS_R_UNSUPPORTED_KEK_ALGORITHM),"unsupported kek algorithm"}, +{ERR_REASON(CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM),"unsupported key encryption algorithm"}, +{ERR_REASON(CMS_R_UNSUPPORTED_RECIPIENT_TYPE),"unsupported recipient type"}, +{ERR_REASON(CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE),"unsupported recpientinfo type"}, +{ERR_REASON(CMS_R_UNSUPPORTED_TYPE) ,"unsupported type"}, +{ERR_REASON(CMS_R_UNWRAP_ERROR) ,"unwrap error"}, +{ERR_REASON(CMS_R_UNWRAP_FAILURE) ,"unwrap failure"}, +{ERR_REASON(CMS_R_VERIFICATION_FAILURE) ,"verification failure"}, +{ERR_REASON(CMS_R_WRAP_ERROR) ,"wrap error"}, +{0,NULL} + }; + +#endif + +void ERR_load_CMS_strings(void) + { +#ifndef OPENSSL_NO_ERR + + if (ERR_func_error_string(CMS_str_functs[0].error) == NULL) + { + ERR_load_strings(0,CMS_str_functs); + ERR_load_strings(0,CMS_str_reasons); + } +#endif + } diff --git a/app/openssl/crypto/cms/cms_ess.c b/app/openssl/crypto/cms/cms_ess.c new file mode 100644 index 00000000..90c0b82f --- /dev/null +++ b/app/openssl/crypto/cms/cms_ess.c @@ -0,0 +1,420 @@ +/* crypto/cms/cms_ess.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" + +DECLARE_ASN1_ITEM(CMS_ReceiptRequest) +DECLARE_ASN1_ITEM(CMS_Receipt) + +IMPLEMENT_ASN1_FUNCTIONS(CMS_ReceiptRequest) + +/* ESS services: for now just Signed Receipt related */ + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr) + { + ASN1_STRING *str; + CMS_ReceiptRequest *rr = NULL; + if (prr) + *prr = NULL; + str = CMS_signed_get0_data_by_OBJ(si, + OBJ_nid2obj(NID_id_smime_aa_receiptRequest), + -3, V_ASN1_SEQUENCE); + if (!str) + return 0; + + rr = ASN1_item_unpack(str, ASN1_ITEM_rptr(CMS_ReceiptRequest)); + if (!rr) + return -1; + if (prr) + *prr = rr; + else + CMS_ReceiptRequest_free(rr); + return 1; + } + +CMS_ReceiptRequest *CMS_ReceiptRequest_create0(unsigned char *id, int idlen, + int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo) + { + CMS_ReceiptRequest *rr = NULL; + + rr = CMS_ReceiptRequest_new(); + if (!rr) + goto merr; + if (id) + ASN1_STRING_set0(rr->signedContentIdentifier, id, idlen); + else + { + if (!ASN1_STRING_set(rr->signedContentIdentifier, NULL, 32)) + goto merr; + if (RAND_pseudo_bytes(rr->signedContentIdentifier->data, 32) + <= 0) + goto err; + } + + sk_GENERAL_NAMES_pop_free(rr->receiptsTo, GENERAL_NAMES_free); + rr->receiptsTo = receiptsTo; + + if (receiptList) + { + rr->receiptsFrom->type = 1; + rr->receiptsFrom->d.receiptList = receiptList; + } + else + { + rr->receiptsFrom->type = 0; + rr->receiptsFrom->d.allOrFirstTier = allorfirst; + } + + return rr; + + merr: + CMSerr(CMS_F_CMS_RECEIPTREQUEST_CREATE0, ERR_R_MALLOC_FAILURE); + + err: + if (rr) + CMS_ReceiptRequest_free(rr); + + return NULL; + + } + +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr) + { + unsigned char *rrder = NULL; + int rrderlen, r = 0; + + rrderlen = i2d_CMS_ReceiptRequest(rr, &rrder); + if (rrderlen < 0) + goto merr; + + if (!CMS_signed_add1_attr_by_NID(si, NID_id_smime_aa_receiptRequest, + V_ASN1_SEQUENCE, rrder, rrderlen)) + goto merr; + + r = 1; + + merr: + if (!r) + CMSerr(CMS_F_CMS_ADD1_RECEIPTREQUEST, ERR_R_MALLOC_FAILURE); + + if (rrder) + OPENSSL_free(rrder); + + return r; + + } + +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto) + { + if (pcid) + *pcid = rr->signedContentIdentifier; + if (rr->receiptsFrom->type == 0) + { + if (pallorfirst) + *pallorfirst = (int)rr->receiptsFrom->d.allOrFirstTier; + if (plist) + *plist = NULL; + } + else + { + if (pallorfirst) + *pallorfirst = -1; + if (plist) + *plist = rr->receiptsFrom->d.receiptList; + } + if (prto) + *prto = rr->receiptsTo; + } + +/* Digest a SignerInfo structure for msgSigDigest attribute processing */ + +static int cms_msgSigDigest(CMS_SignerInfo *si, + unsigned char *dig, unsigned int *diglen) + { + const EVP_MD *md; + md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm); + if (md == NULL) + return 0; + if (!ASN1_item_digest(ASN1_ITEM_rptr(CMS_Attributes_Verify), md, + si->signedAttrs, dig, diglen)) + return 0; + return 1; + } + +/* Add a msgSigDigest attribute to a SignerInfo */ + +int cms_msgSigDigest_add1(CMS_SignerInfo *dest, CMS_SignerInfo *src) + { + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int diglen; + if (!cms_msgSigDigest(src, dig, &diglen)) + { + CMSerr(CMS_F_CMS_MSGSIGDIGEST_ADD1, CMS_R_MSGSIGDIGEST_ERROR); + return 0; + } + if (!CMS_signed_add1_attr_by_NID(dest, NID_id_smime_aa_msgSigDigest, + V_ASN1_OCTET_STRING, dig, diglen)) + { + CMSerr(CMS_F_CMS_MSGSIGDIGEST_ADD1, ERR_R_MALLOC_FAILURE); + return 0; + } + return 1; + } + +/* Verify signed receipt after it has already passed normal CMS verify */ + +int cms_Receipt_verify(CMS_ContentInfo *cms, CMS_ContentInfo *req_cms) + { + int r = 0, i; + CMS_ReceiptRequest *rr = NULL; + CMS_Receipt *rct = NULL; + STACK_OF(CMS_SignerInfo) *sis, *osis; + CMS_SignerInfo *si, *osi = NULL; + ASN1_OCTET_STRING *msig, **pcont; + ASN1_OBJECT *octype; + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int diglen; + + /* Get SignerInfos, also checks SignedData content type */ + osis = CMS_get0_SignerInfos(req_cms); + sis = CMS_get0_SignerInfos(cms); + if (!osis || !sis) + goto err; + + if (sk_CMS_SignerInfo_num(sis) != 1) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NEED_ONE_SIGNER); + goto err; + } + + /* Check receipt content type */ + if (OBJ_obj2nid(CMS_get0_eContentType(cms)) != NID_id_smime_ct_receipt) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NOT_A_SIGNED_RECEIPT); + goto err; + } + + /* Extract and decode receipt content */ + pcont = CMS_get0_content(cms); + if (!pcont || !*pcont) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_CONTENT); + goto err; + } + + rct = ASN1_item_unpack(*pcont, ASN1_ITEM_rptr(CMS_Receipt)); + + if (!rct) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_RECEIPT_DECODE_ERROR); + goto err; + } + + /* Locate original request */ + + for (i = 0; i < sk_CMS_SignerInfo_num(osis); i++) + { + osi = sk_CMS_SignerInfo_value(osis, i); + if (!ASN1_STRING_cmp(osi->signature, + rct->originatorSignatureValue)) + break; + } + + if (i == sk_CMS_SignerInfo_num(osis)) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_MATCHING_SIGNATURE); + goto err; + } + + si = sk_CMS_SignerInfo_value(sis, 0); + + /* Get msgSigDigest value and compare */ + + msig = CMS_signed_get0_data_by_OBJ(si, + OBJ_nid2obj(NID_id_smime_aa_msgSigDigest), + -3, V_ASN1_OCTET_STRING); + + if (!msig) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_MSGSIGDIGEST); + goto err; + } + + if (!cms_msgSigDigest(osi, dig, &diglen)) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_MSGSIGDIGEST_ERROR); + goto err; + } + + if (diglen != (unsigned int)msig->length) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, + CMS_R_MSGSIGDIGEST_WRONG_LENGTH); + goto err; + } + + if (memcmp(dig, msig->data, diglen)) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, + CMS_R_MSGSIGDIGEST_VERIFICATION_FAILURE); + goto err; + } + + /* Compare content types */ + + octype = CMS_signed_get0_data_by_OBJ(osi, + OBJ_nid2obj(NID_pkcs9_contentType), + -3, V_ASN1_OBJECT); + if (!octype) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_CONTENT_TYPE); + goto err; + } + + /* Compare details in receipt request */ + + if (OBJ_cmp(octype, rct->contentType)) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_CONTENT_TYPE_MISMATCH); + goto err; + } + + /* Get original receipt request details */ + + if (CMS_get1_ReceiptRequest(osi, &rr) <= 0) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, CMS_R_NO_RECEIPT_REQUEST); + goto err; + } + + if (ASN1_STRING_cmp(rr->signedContentIdentifier, + rct->signedContentIdentifier)) + { + CMSerr(CMS_F_CMS_RECEIPT_VERIFY, + CMS_R_CONTENTIDENTIFIER_MISMATCH); + goto err; + } + + r = 1; + + err: + if (rr) + CMS_ReceiptRequest_free(rr); + if (rct) + M_ASN1_free_of(rct, CMS_Receipt); + + return r; + + } + +/* Encode a Receipt into an OCTET STRING read for including into content of + * a SignedData ContentInfo. + */ + +ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si) + { + CMS_Receipt rct; + CMS_ReceiptRequest *rr = NULL; + ASN1_OBJECT *ctype; + ASN1_OCTET_STRING *os = NULL; + + /* Get original receipt request */ + + /* Get original receipt request details */ + + if (CMS_get1_ReceiptRequest(si, &rr) <= 0) + { + CMSerr(CMS_F_CMS_ENCODE_RECEIPT, CMS_R_NO_RECEIPT_REQUEST); + goto err; + } + + /* Get original content type */ + + ctype = CMS_signed_get0_data_by_OBJ(si, + OBJ_nid2obj(NID_pkcs9_contentType), + -3, V_ASN1_OBJECT); + if (!ctype) + { + CMSerr(CMS_F_CMS_ENCODE_RECEIPT, CMS_R_NO_CONTENT_TYPE); + goto err; + } + + rct.version = 1; + rct.contentType = ctype; + rct.signedContentIdentifier = rr->signedContentIdentifier; + rct.originatorSignatureValue = si->signature; + + os = ASN1_item_pack(&rct, ASN1_ITEM_rptr(CMS_Receipt), NULL); + + err: + if (rr) + CMS_ReceiptRequest_free(rr); + + return os; + + } + + diff --git a/app/openssl/crypto/cms/cms_io.c b/app/openssl/crypto/cms/cms_io.c new file mode 100644 index 00000000..1cb0264c --- /dev/null +++ b/app/openssl/crypto/cms/cms_io.c @@ -0,0 +1,133 @@ +/* crypto/cms/cms_io.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include +#include "cms.h" +#include "cms_lcl.h" + +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms) + { + ASN1_OCTET_STRING **pos; + pos = CMS_get0_content(cms); + if (!pos) + return 0; + if (!*pos) + *pos = ASN1_OCTET_STRING_new(); + if (*pos) + { + (*pos)->flags |= ASN1_STRING_FLAG_NDEF; + (*pos)->flags &= ~ASN1_STRING_FLAG_CONT; + *boundary = &(*pos)->data; + return 1; + } + CMSerr(CMS_F_CMS_STREAM, ERR_R_MALLOC_FAILURE); + return 0; + } + +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms) + { + return ASN1_item_d2i_bio(ASN1_ITEM_rptr(CMS_ContentInfo), bp, cms); + } + +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms) + { + return ASN1_item_i2d_bio(ASN1_ITEM_rptr(CMS_ContentInfo), bp, cms); + } + +IMPLEMENT_PEM_rw_const(CMS, CMS_ContentInfo, PEM_STRING_CMS, CMS_ContentInfo) + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms) + { + return BIO_new_NDEF(out, (ASN1_VALUE *)cms, + ASN1_ITEM_rptr(CMS_ContentInfo)); + } + +/* CMS wrappers round generalised stream and MIME routines */ + +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags) + { + return i2d_ASN1_bio_stream(out, (ASN1_VALUE *)cms, in, flags, + ASN1_ITEM_rptr(CMS_ContentInfo)); + } + +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags) + { + return PEM_write_bio_ASN1_stream(out, (ASN1_VALUE *) cms, in, flags, + "CMS", + ASN1_ITEM_rptr(CMS_ContentInfo)); + } + +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags) + { + STACK_OF(X509_ALGOR) *mdalgs; + int ctype_nid = OBJ_obj2nid(cms->contentType); + int econt_nid = OBJ_obj2nid(CMS_get0_eContentType(cms)); + if (ctype_nid == NID_pkcs7_signed) + mdalgs = cms->d.signedData->digestAlgorithms; + else + mdalgs = NULL; + + return SMIME_write_ASN1(bio, (ASN1_VALUE *)cms, data, flags, + ctype_nid, econt_nid, mdalgs, + ASN1_ITEM_rptr(CMS_ContentInfo)); + } + +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont) + { + return (CMS_ContentInfo *)SMIME_read_ASN1(bio, bcont, + ASN1_ITEM_rptr(CMS_ContentInfo)); + } + diff --git a/app/openssl/crypto/cms/cms_lcl.h b/app/openssl/crypto/cms/cms_lcl.h new file mode 100644 index 00000000..a9f97301 --- /dev/null +++ b/app/openssl/crypto/cms/cms_lcl.h @@ -0,0 +1,473 @@ +/* crypto/cms/cms_lcl.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#ifndef HEADER_CMS_LCL_H +#define HEADER_CMS_LCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Cryptographic message syntax (CMS) structures: taken + * from RFC3852 + */ + +/* Forward references */ + +typedef struct CMS_IssuerAndSerialNumber_st CMS_IssuerAndSerialNumber; +typedef struct CMS_EncapsulatedContentInfo_st CMS_EncapsulatedContentInfo; +typedef struct CMS_SignerIdentifier_st CMS_SignerIdentifier; +typedef struct CMS_SignedData_st CMS_SignedData; +typedef struct CMS_OtherRevocationInfoFormat_st CMS_OtherRevocationInfoFormat; +typedef struct CMS_OriginatorInfo_st CMS_OriginatorInfo; +typedef struct CMS_EncryptedContentInfo_st CMS_EncryptedContentInfo; +typedef struct CMS_EnvelopedData_st CMS_EnvelopedData; +typedef struct CMS_DigestedData_st CMS_DigestedData; +typedef struct CMS_EncryptedData_st CMS_EncryptedData; +typedef struct CMS_AuthenticatedData_st CMS_AuthenticatedData; +typedef struct CMS_CompressedData_st CMS_CompressedData; +typedef struct CMS_OtherCertificateFormat_st CMS_OtherCertificateFormat; +typedef struct CMS_KeyTransRecipientInfo_st CMS_KeyTransRecipientInfo; +typedef struct CMS_OriginatorPublicKey_st CMS_OriginatorPublicKey; +typedef struct CMS_OriginatorIdentifierOrKey_st CMS_OriginatorIdentifierOrKey; +typedef struct CMS_KeyAgreeRecipientInfo_st CMS_KeyAgreeRecipientInfo; +typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; +typedef struct CMS_RecipientKeyIdentifier_st CMS_RecipientKeyIdentifier; +typedef struct CMS_KeyAgreeRecipientIdentifier_st CMS_KeyAgreeRecipientIdentifier; +typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; +typedef struct CMS_KEKIdentifier_st CMS_KEKIdentifier; +typedef struct CMS_KEKRecipientInfo_st CMS_KEKRecipientInfo; +typedef struct CMS_PasswordRecipientInfo_st CMS_PasswordRecipientInfo; +typedef struct CMS_OtherRecipientInfo_st CMS_OtherRecipientInfo; +typedef struct CMS_ReceiptsFrom_st CMS_ReceiptsFrom; + +struct CMS_ContentInfo_st + { + ASN1_OBJECT *contentType; + union { + ASN1_OCTET_STRING *data; + CMS_SignedData *signedData; + CMS_EnvelopedData *envelopedData; + CMS_DigestedData *digestedData; + CMS_EncryptedData *encryptedData; + CMS_AuthenticatedData *authenticatedData; + CMS_CompressedData *compressedData; + ASN1_TYPE *other; + /* Other types ... */ + void *otherData; + } d; + }; + +struct CMS_SignedData_st + { + long version; + STACK_OF(X509_ALGOR) *digestAlgorithms; + CMS_EncapsulatedContentInfo *encapContentInfo; + STACK_OF(CMS_CertificateChoices) *certificates; + STACK_OF(CMS_RevocationInfoChoice) *crls; + STACK_OF(CMS_SignerInfo) *signerInfos; + }; + +struct CMS_EncapsulatedContentInfo_st + { + ASN1_OBJECT *eContentType; + ASN1_OCTET_STRING *eContent; + /* Set to 1 if incomplete structure only part set up */ + int partial; + }; + +struct CMS_SignerInfo_st + { + long version; + CMS_SignerIdentifier *sid; + X509_ALGOR *digestAlgorithm; + STACK_OF(X509_ATTRIBUTE) *signedAttrs; + X509_ALGOR *signatureAlgorithm; + ASN1_OCTET_STRING *signature; + STACK_OF(X509_ATTRIBUTE) *unsignedAttrs; + /* Signing certificate and key */ + X509 *signer; + EVP_PKEY *pkey; + }; + +struct CMS_SignerIdentifier_st + { + int type; + union { + CMS_IssuerAndSerialNumber *issuerAndSerialNumber; + ASN1_OCTET_STRING *subjectKeyIdentifier; + } d; + }; + +struct CMS_EnvelopedData_st + { + long version; + CMS_OriginatorInfo *originatorInfo; + STACK_OF(CMS_RecipientInfo) *recipientInfos; + CMS_EncryptedContentInfo *encryptedContentInfo; + STACK_OF(X509_ATTRIBUTE) *unprotectedAttrs; + }; + +struct CMS_OriginatorInfo_st + { + STACK_OF(CMS_CertificateChoices) *certificates; + STACK_OF(CMS_RevocationInfoChoice) *crls; + }; + +struct CMS_EncryptedContentInfo_st + { + ASN1_OBJECT *contentType; + X509_ALGOR *contentEncryptionAlgorithm; + ASN1_OCTET_STRING *encryptedContent; + /* Content encryption algorithm and key */ + const EVP_CIPHER *cipher; + unsigned char *key; + size_t keylen; + /* Set to 1 if we are debugging decrypt and don't fake keys for MMA */ + int debug; + }; + +struct CMS_RecipientInfo_st + { + int type; + union { + CMS_KeyTransRecipientInfo *ktri; + CMS_KeyAgreeRecipientInfo *kari; + CMS_KEKRecipientInfo *kekri; + CMS_PasswordRecipientInfo *pwri; + CMS_OtherRecipientInfo *ori; + } d; + }; + +typedef CMS_SignerIdentifier CMS_RecipientIdentifier; + +struct CMS_KeyTransRecipientInfo_st + { + long version; + CMS_RecipientIdentifier *rid; + X509_ALGOR *keyEncryptionAlgorithm; + ASN1_OCTET_STRING *encryptedKey; + /* Recipient Key and cert */ + X509 *recip; + EVP_PKEY *pkey; + }; + +struct CMS_KeyAgreeRecipientInfo_st + { + long version; + CMS_OriginatorIdentifierOrKey *originator; + ASN1_OCTET_STRING *ukm; + X509_ALGOR *keyEncryptionAlgorithm; + STACK_OF(CMS_RecipientEncryptedKey) *recipientEncryptedKeys; + }; + +struct CMS_OriginatorIdentifierOrKey_st + { + int type; + union { + CMS_IssuerAndSerialNumber *issuerAndSerialNumber; + ASN1_OCTET_STRING *subjectKeyIdentifier; + CMS_OriginatorPublicKey *originatorKey; + } d; + }; + +struct CMS_OriginatorPublicKey_st + { + X509_ALGOR *algorithm; + ASN1_BIT_STRING *publicKey; + }; + +struct CMS_RecipientEncryptedKey_st + { + CMS_KeyAgreeRecipientIdentifier *rid; + ASN1_OCTET_STRING *encryptedKey; + }; + +struct CMS_KeyAgreeRecipientIdentifier_st + { + int type; + union { + CMS_IssuerAndSerialNumber *issuerAndSerialNumber; + CMS_RecipientKeyIdentifier *rKeyId; + } d; + }; + +struct CMS_RecipientKeyIdentifier_st + { + ASN1_OCTET_STRING *subjectKeyIdentifier; + ASN1_GENERALIZEDTIME *date; + CMS_OtherKeyAttribute *other; + }; + +struct CMS_KEKRecipientInfo_st + { + long version; + CMS_KEKIdentifier *kekid; + X509_ALGOR *keyEncryptionAlgorithm; + ASN1_OCTET_STRING *encryptedKey; + /* Extra info: symmetric key to use */ + unsigned char *key; + size_t keylen; + }; + +struct CMS_KEKIdentifier_st + { + ASN1_OCTET_STRING *keyIdentifier; + ASN1_GENERALIZEDTIME *date; + CMS_OtherKeyAttribute *other; + }; + +struct CMS_PasswordRecipientInfo_st + { + long version; + X509_ALGOR *keyDerivationAlgorithm; + X509_ALGOR *keyEncryptionAlgorithm; + ASN1_OCTET_STRING *encryptedKey; + /* Extra info: password to use */ + unsigned char *pass; + size_t passlen; + }; + +struct CMS_OtherRecipientInfo_st + { + ASN1_OBJECT *oriType; + ASN1_TYPE *oriValue; + }; + +struct CMS_DigestedData_st + { + long version; + X509_ALGOR *digestAlgorithm; + CMS_EncapsulatedContentInfo *encapContentInfo; + ASN1_OCTET_STRING *digest; + }; + +struct CMS_EncryptedData_st + { + long version; + CMS_EncryptedContentInfo *encryptedContentInfo; + STACK_OF(X509_ATTRIBUTE) *unprotectedAttrs; + }; + +struct CMS_AuthenticatedData_st + { + long version; + CMS_OriginatorInfo *originatorInfo; + STACK_OF(CMS_RecipientInfo) *recipientInfos; + X509_ALGOR *macAlgorithm; + X509_ALGOR *digestAlgorithm; + CMS_EncapsulatedContentInfo *encapContentInfo; + STACK_OF(X509_ATTRIBUTE) *authAttrs; + ASN1_OCTET_STRING *mac; + STACK_OF(X509_ATTRIBUTE) *unauthAttrs; + }; + +struct CMS_CompressedData_st + { + long version; + X509_ALGOR *compressionAlgorithm; + STACK_OF(CMS_RecipientInfo) *recipientInfos; + CMS_EncapsulatedContentInfo *encapContentInfo; + }; + +struct CMS_RevocationInfoChoice_st + { + int type; + union { + X509_CRL *crl; + CMS_OtherRevocationInfoFormat *other; + } d; + }; + +#define CMS_REVCHOICE_CRL 0 +#define CMS_REVCHOICE_OTHER 1 + +struct CMS_OtherRevocationInfoFormat_st + { + ASN1_OBJECT *otherRevInfoFormat; + ASN1_TYPE *otherRevInfo; + }; + +struct CMS_CertificateChoices + { + int type; + union { + X509 *certificate; + ASN1_STRING *extendedCertificate; /* Obsolete */ + ASN1_STRING *v1AttrCert; /* Left encoded for now */ + ASN1_STRING *v2AttrCert; /* Left encoded for now */ + CMS_OtherCertificateFormat *other; + } d; + }; + +#define CMS_CERTCHOICE_CERT 0 +#define CMS_CERTCHOICE_EXCERT 1 +#define CMS_CERTCHOICE_V1ACERT 2 +#define CMS_CERTCHOICE_V2ACERT 3 +#define CMS_CERTCHOICE_OTHER 4 + +struct CMS_OtherCertificateFormat_st + { + ASN1_OBJECT *otherCertFormat; + ASN1_TYPE *otherCert; + }; + +/* This is also defined in pkcs7.h but we duplicate it + * to allow the CMS code to be independent of PKCS#7 + */ + +struct CMS_IssuerAndSerialNumber_st + { + X509_NAME *issuer; + ASN1_INTEGER *serialNumber; + }; + +struct CMS_OtherKeyAttribute_st + { + ASN1_OBJECT *keyAttrId; + ASN1_TYPE *keyAttr; + }; + +/* ESS structures */ + +#ifdef HEADER_X509V3_H + +struct CMS_ReceiptRequest_st + { + ASN1_OCTET_STRING *signedContentIdentifier; + CMS_ReceiptsFrom *receiptsFrom; + STACK_OF(GENERAL_NAMES) *receiptsTo; + }; + + +struct CMS_ReceiptsFrom_st + { + int type; + union + { + long allOrFirstTier; + STACK_OF(GENERAL_NAMES) *receiptList; + } d; + }; +#endif + +struct CMS_Receipt_st + { + long version; + ASN1_OBJECT *contentType; + ASN1_OCTET_STRING *signedContentIdentifier; + ASN1_OCTET_STRING *originatorSignatureValue; + }; + +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_ITEM(CMS_SignerInfo) +DECLARE_ASN1_ITEM(CMS_IssuerAndSerialNumber) +DECLARE_ASN1_ITEM(CMS_Attributes_Sign) +DECLARE_ASN1_ITEM(CMS_Attributes_Verify) +DECLARE_ASN1_ITEM(CMS_RecipientInfo) +DECLARE_ASN1_ITEM(CMS_PasswordRecipientInfo) +DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber) + +#define CMS_SIGNERINFO_ISSUER_SERIAL 0 +#define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +#define CMS_RECIPINFO_ISSUER_SERIAL 0 +#define CMS_RECIPINFO_KEYIDENTIFIER 1 + +BIO *cms_content_bio(CMS_ContentInfo *cms); + +CMS_ContentInfo *cms_Data_create(void); + +CMS_ContentInfo *cms_DigestedData_create(const EVP_MD *md); +BIO *cms_DigestedData_init_bio(CMS_ContentInfo *cms); +int cms_DigestedData_do_final(CMS_ContentInfo *cms, BIO *chain, int verify); + +BIO *cms_SignedData_init_bio(CMS_ContentInfo *cms); +int cms_SignedData_final(CMS_ContentInfo *cms, BIO *chain); +int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type); +int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int cms_SignerIdentifier_cert_cmp(CMS_SignerIdentifier *sid, X509 *cert); + +CMS_ContentInfo *cms_CompressedData_create(int comp_nid); +BIO *cms_CompressedData_init_bio(CMS_ContentInfo *cms); + +void cms_DigestAlgorithm_set(X509_ALGOR *alg, const EVP_MD *md); +BIO *cms_DigestAlgorithm_init_bio(X509_ALGOR *digestAlgorithm); +int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain, + X509_ALGOR *mdalg); + +BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec); +BIO *cms_EncryptedData_init_bio(CMS_ContentInfo *cms); +int cms_EncryptedContent_init(CMS_EncryptedContentInfo *ec, + const EVP_CIPHER *cipher, + const unsigned char *key, size_t keylen); + +int cms_Receipt_verify(CMS_ContentInfo *cms, CMS_ContentInfo *req_cms); +int cms_msgSigDigest_add1(CMS_SignerInfo *dest, CMS_SignerInfo *src); +ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si); + +BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms); +CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms); + +/* PWRI routines */ +int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, + int en_de); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/app/openssl/crypto/cms/cms_lib.c b/app/openssl/crypto/cms/cms_lib.c new file mode 100644 index 00000000..ba08279a --- /dev/null +++ b/app/openssl/crypto/cms/cms_lib.c @@ -0,0 +1,622 @@ +/* crypto/cms/cms_lib.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include +#include +#include +#include "cms.h" +#include "cms_lcl.h" + +IMPLEMENT_ASN1_FUNCTIONS(CMS_ContentInfo) +IMPLEMENT_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +DECLARE_ASN1_ITEM(CMS_CertificateChoices) +DECLARE_ASN1_ITEM(CMS_RevocationInfoChoice) +DECLARE_STACK_OF(CMS_CertificateChoices) +DECLARE_STACK_OF(CMS_RevocationInfoChoice) + +const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms) + { + return cms->contentType; + } + +CMS_ContentInfo *cms_Data_create(void) + { + CMS_ContentInfo *cms; + cms = CMS_ContentInfo_new(); + if (cms) + { + cms->contentType = OBJ_nid2obj(NID_pkcs7_data); + /* Never detached */ + CMS_set_detached(cms, 0); + } + return cms; + } + +BIO *cms_content_bio(CMS_ContentInfo *cms) + { + ASN1_OCTET_STRING **pos = CMS_get0_content(cms); + if (!pos) + return NULL; + /* If content detached data goes nowhere: create NULL BIO */ + if (!*pos) + return BIO_new(BIO_s_null()); + /* If content not detached and created return memory BIO + */ + if (!*pos || ((*pos)->flags == ASN1_STRING_FLAG_CONT)) + return BIO_new(BIO_s_mem()); + /* Else content was read in: return read only BIO for it */ + return BIO_new_mem_buf((*pos)->data, (*pos)->length); + } + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont) + { + BIO *cmsbio, *cont; + if (icont) + cont = icont; + else + cont = cms_content_bio(cms); + if (!cont) + { + CMSerr(CMS_F_CMS_DATAINIT, CMS_R_NO_CONTENT); + return NULL; + } + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_data: + return cont; + + case NID_pkcs7_signed: + cmsbio = cms_SignedData_init_bio(cms); + break; + + case NID_pkcs7_digest: + cmsbio = cms_DigestedData_init_bio(cms); + break; +#ifdef ZLIB + case NID_id_smime_ct_compressedData: + cmsbio = cms_CompressedData_init_bio(cms); + break; +#endif + + case NID_pkcs7_encrypted: + cmsbio = cms_EncryptedData_init_bio(cms); + break; + + case NID_pkcs7_enveloped: + cmsbio = cms_EnvelopedData_init_bio(cms); + break; + + default: + CMSerr(CMS_F_CMS_DATAINIT, CMS_R_UNSUPPORTED_TYPE); + return NULL; + } + + if (cmsbio) + return BIO_push(cmsbio, cont); + + if (!icont) + BIO_free(cont); + return NULL; + + } + +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *cmsbio) + { + ASN1_OCTET_STRING **pos = CMS_get0_content(cms); + if (!pos) + return 0; + /* If ebmedded content find memory BIO and set content */ + if (*pos && ((*pos)->flags & ASN1_STRING_FLAG_CONT)) + { + BIO *mbio; + unsigned char *cont; + long contlen; + mbio = BIO_find_type(cmsbio, BIO_TYPE_MEM); + if (!mbio) + { + CMSerr(CMS_F_CMS_DATAFINAL, CMS_R_CONTENT_NOT_FOUND); + return 0; + } + contlen = BIO_get_mem_data(mbio, &cont); + /* Set bio as read only so its content can't be clobbered */ + BIO_set_flags(mbio, BIO_FLAGS_MEM_RDONLY); + BIO_set_mem_eof_return(mbio, 0); + ASN1_STRING_set0(*pos, cont, contlen); + (*pos)->flags &= ~ASN1_STRING_FLAG_CONT; + } + + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_data: + case NID_pkcs7_enveloped: + case NID_pkcs7_encrypted: + case NID_id_smime_ct_compressedData: + /* Nothing to do */ + return 1; + + case NID_pkcs7_signed: + return cms_SignedData_final(cms, cmsbio); + + case NID_pkcs7_digest: + return cms_DigestedData_do_final(cms, cmsbio, 0); + + default: + CMSerr(CMS_F_CMS_DATAFINAL, CMS_R_UNSUPPORTED_TYPE); + return 0; + } + } + +/* Return an OCTET STRING pointer to content. This allows it to + * be accessed or set later. + */ + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms) + { + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_data: + return &cms->d.data; + + case NID_pkcs7_signed: + return &cms->d.signedData->encapContentInfo->eContent; + + case NID_pkcs7_enveloped: + return &cms->d.envelopedData->encryptedContentInfo->encryptedContent; + + case NID_pkcs7_digest: + return &cms->d.digestedData->encapContentInfo->eContent; + + case NID_pkcs7_encrypted: + return &cms->d.encryptedData->encryptedContentInfo->encryptedContent; + + case NID_id_smime_ct_authData: + return &cms->d.authenticatedData->encapContentInfo->eContent; + + case NID_id_smime_ct_compressedData: + return &cms->d.compressedData->encapContentInfo->eContent; + + default: + if (cms->d.other->type == V_ASN1_OCTET_STRING) + return &cms->d.other->value.octet_string; + CMSerr(CMS_F_CMS_GET0_CONTENT, CMS_R_UNSUPPORTED_CONTENT_TYPE); + return NULL; + + } + } + +/* Return an ASN1_OBJECT pointer to content type. This allows it to + * be accessed or set later. + */ + +static ASN1_OBJECT **cms_get0_econtent_type(CMS_ContentInfo *cms) + { + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_signed: + return &cms->d.signedData->encapContentInfo->eContentType; + + case NID_pkcs7_enveloped: + return &cms->d.envelopedData->encryptedContentInfo->contentType; + + case NID_pkcs7_digest: + return &cms->d.digestedData->encapContentInfo->eContentType; + + case NID_pkcs7_encrypted: + return &cms->d.encryptedData->encryptedContentInfo->contentType; + + case NID_id_smime_ct_authData: + return &cms->d.authenticatedData->encapContentInfo->eContentType; + + case NID_id_smime_ct_compressedData: + return &cms->d.compressedData->encapContentInfo->eContentType; + + default: + CMSerr(CMS_F_CMS_GET0_ECONTENT_TYPE, + CMS_R_UNSUPPORTED_CONTENT_TYPE); + return NULL; + + } + } + +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms) + { + ASN1_OBJECT **petype; + petype = cms_get0_econtent_type(cms); + if (petype) + return *petype; + return NULL; + } + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid) + { + ASN1_OBJECT **petype, *etype; + petype = cms_get0_econtent_type(cms); + if (!petype) + return 0; + if (!oid) + return 1; + etype = OBJ_dup(oid); + if (!etype) + return 0; + ASN1_OBJECT_free(*petype); + *petype = etype; + return 1; + } + +int CMS_is_detached(CMS_ContentInfo *cms) + { + ASN1_OCTET_STRING **pos; + pos = CMS_get0_content(cms); + if (!pos) + return -1; + if (*pos) + return 0; + return 1; + } + +int CMS_set_detached(CMS_ContentInfo *cms, int detached) + { + ASN1_OCTET_STRING **pos; + pos = CMS_get0_content(cms); + if (!pos) + return 0; + if (detached) + { + if (*pos) + { + ASN1_OCTET_STRING_free(*pos); + *pos = NULL; + } + return 1; + } + if (!*pos) + *pos = ASN1_OCTET_STRING_new(); + if (*pos) + { + /* NB: special flag to show content is created and not + * read in. + */ + (*pos)->flags |= ASN1_STRING_FLAG_CONT; + return 1; + } + CMSerr(CMS_F_CMS_SET_DETACHED, ERR_R_MALLOC_FAILURE); + return 0; + } + +/* Set up an X509_ALGOR DigestAlgorithmIdentifier from an EVP_MD */ + +void cms_DigestAlgorithm_set(X509_ALGOR *alg, const EVP_MD *md) + { + int param_type; + + if (md->flags & EVP_MD_FLAG_DIGALGID_ABSENT) + param_type = V_ASN1_UNDEF; + else + param_type = V_ASN1_NULL; + + X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); + + } + +/* Create a digest BIO from an X509_ALGOR structure */ + +BIO *cms_DigestAlgorithm_init_bio(X509_ALGOR *digestAlgorithm) + { + BIO *mdbio = NULL; + ASN1_OBJECT *digestoid; + const EVP_MD *digest; + X509_ALGOR_get0(&digestoid, NULL, NULL, digestAlgorithm); + digest = EVP_get_digestbyobj(digestoid); + if (!digest) + { + CMSerr(CMS_F_CMS_DIGESTALGORITHM_INIT_BIO, + CMS_R_UNKNOWN_DIGEST_ALGORIHM); + goto err; + } + mdbio = BIO_new(BIO_f_md()); + if (!mdbio || !BIO_set_md(mdbio, digest)) + { + CMSerr(CMS_F_CMS_DIGESTALGORITHM_INIT_BIO, + CMS_R_MD_BIO_INIT_ERROR); + goto err; + } + return mdbio; + err: + if (mdbio) + BIO_free(mdbio); + return NULL; + } + +/* Locate a message digest content from a BIO chain based on SignerInfo */ + +int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain, + X509_ALGOR *mdalg) + { + int nid; + ASN1_OBJECT *mdoid; + X509_ALGOR_get0(&mdoid, NULL, NULL, mdalg); + nid = OBJ_obj2nid(mdoid); + /* Look for digest type to match signature */ + for (;;) + { + EVP_MD_CTX *mtmp; + chain = BIO_find_type(chain, BIO_TYPE_MD); + if (chain == NULL) + { + CMSerr(CMS_F_CMS_DIGESTALGORITHM_FIND_CTX, + CMS_R_NO_MATCHING_DIGEST); + return 0; + } + BIO_get_md_ctx(chain, &mtmp); + if (EVP_MD_CTX_type(mtmp) == nid + /* Workaround for broken implementations that use signature + * algorithm OID instead of digest. + */ + || EVP_MD_pkey_type(EVP_MD_CTX_md(mtmp)) == nid) + return EVP_MD_CTX_copy_ex(mctx, mtmp); + chain = BIO_next(chain); + } + } + +static STACK_OF(CMS_CertificateChoices) **cms_get0_certificate_choices(CMS_ContentInfo *cms) + { + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_signed: + return &cms->d.signedData->certificates; + + case NID_pkcs7_enveloped: + return &cms->d.envelopedData->originatorInfo->certificates; + + default: + CMSerr(CMS_F_CMS_GET0_CERTIFICATE_CHOICES, + CMS_R_UNSUPPORTED_CONTENT_TYPE); + return NULL; + + } + } + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms) + { + STACK_OF(CMS_CertificateChoices) **pcerts; + CMS_CertificateChoices *cch; + pcerts = cms_get0_certificate_choices(cms); + if (!pcerts) + return NULL; + if (!*pcerts) + *pcerts = sk_CMS_CertificateChoices_new_null(); + if (!*pcerts) + return NULL; + cch = M_ASN1_new_of(CMS_CertificateChoices); + if (!cch) + return NULL; + if (!sk_CMS_CertificateChoices_push(*pcerts, cch)) + { + M_ASN1_free_of(cch, CMS_CertificateChoices); + return NULL; + } + return cch; + } + +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert) + { + CMS_CertificateChoices *cch; + STACK_OF(CMS_CertificateChoices) **pcerts; + int i; + pcerts = cms_get0_certificate_choices(cms); + if (!pcerts) + return 0; + for (i = 0; i < sk_CMS_CertificateChoices_num(*pcerts); i++) + { + cch = sk_CMS_CertificateChoices_value(*pcerts, i); + if (cch->type == CMS_CERTCHOICE_CERT) + { + if (!X509_cmp(cch->d.certificate, cert)) + { + CMSerr(CMS_F_CMS_ADD0_CERT, + CMS_R_CERTIFICATE_ALREADY_PRESENT); + return 0; + } + } + } + cch = CMS_add0_CertificateChoices(cms); + if (!cch) + return 0; + cch->type = CMS_CERTCHOICE_CERT; + cch->d.certificate = cert; + return 1; + } + +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert) + { + int r; + r = CMS_add0_cert(cms, cert); + if (r > 0) + CRYPTO_add(&cert->references, 1, CRYPTO_LOCK_X509); + return r; + } + +static STACK_OF(CMS_RevocationInfoChoice) **cms_get0_revocation_choices(CMS_ContentInfo *cms) + { + switch (OBJ_obj2nid(cms->contentType)) + { + + case NID_pkcs7_signed: + return &cms->d.signedData->crls; + + case NID_pkcs7_enveloped: + return &cms->d.envelopedData->originatorInfo->crls; + + default: + CMSerr(CMS_F_CMS_GET0_REVOCATION_CHOICES, + CMS_R_UNSUPPORTED_CONTENT_TYPE); + return NULL; + + } + } + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms) + { + STACK_OF(CMS_RevocationInfoChoice) **pcrls; + CMS_RevocationInfoChoice *rch; + pcrls = cms_get0_revocation_choices(cms); + if (!pcrls) + return NULL; + if (!*pcrls) + *pcrls = sk_CMS_RevocationInfoChoice_new_null(); + if (!*pcrls) + return NULL; + rch = M_ASN1_new_of(CMS_RevocationInfoChoice); + if (!rch) + return NULL; + if (!sk_CMS_RevocationInfoChoice_push(*pcrls, rch)) + { + M_ASN1_free_of(rch, CMS_RevocationInfoChoice); + return NULL; + } + return rch; + } + +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl) + { + CMS_RevocationInfoChoice *rch; + rch = CMS_add0_RevocationInfoChoice(cms); + if (!rch) + return 0; + rch->type = CMS_REVCHOICE_CRL; + rch->d.crl = crl; + return 1; + } + +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl) + { + int r; + r = CMS_add0_crl(cms, crl); + if (r > 0) + CRYPTO_add(&crl->references, 1, CRYPTO_LOCK_X509_CRL); + return r; + } + +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms) + { + STACK_OF(X509) *certs = NULL; + CMS_CertificateChoices *cch; + STACK_OF(CMS_CertificateChoices) **pcerts; + int i; + pcerts = cms_get0_certificate_choices(cms); + if (!pcerts) + return NULL; + for (i = 0; i < sk_CMS_CertificateChoices_num(*pcerts); i++) + { + cch = sk_CMS_CertificateChoices_value(*pcerts, i); + if (cch->type == 0) + { + if (!certs) + { + certs = sk_X509_new_null(); + if (!certs) + return NULL; + } + if (!sk_X509_push(certs, cch->d.certificate)) + { + sk_X509_pop_free(certs, X509_free); + return NULL; + } + CRYPTO_add(&cch->d.certificate->references, + 1, CRYPTO_LOCK_X509); + } + } + return certs; + + } + +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms) + { + STACK_OF(X509_CRL) *crls = NULL; + STACK_OF(CMS_RevocationInfoChoice) **pcrls; + CMS_RevocationInfoChoice *rch; + int i; + pcrls = cms_get0_revocation_choices(cms); + if (!pcrls) + return NULL; + for (i = 0; i < sk_CMS_RevocationInfoChoice_num(*pcrls); i++) + { + rch = sk_CMS_RevocationInfoChoice_value(*pcrls, i); + if (rch->type == 0) + { + if (!crls) + { + crls = sk_X509_CRL_new_null(); + if (!crls) + return NULL; + } + if (!sk_X509_CRL_push(crls, rch->d.crl)) + { + sk_X509_CRL_pop_free(crls, X509_CRL_free); + return NULL; + } + CRYPTO_add(&rch->d.crl->references, + 1, CRYPTO_LOCK_X509_CRL); + } + } + return crls; + } diff --git a/app/openssl/crypto/cms/cms_pwri.c b/app/openssl/crypto/cms/cms_pwri.c new file mode 100644 index 00000000..b79612a1 --- /dev/null +++ b/app/openssl/crypto/cms/cms_pwri.c @@ -0,0 +1,454 @@ +/* crypto/cms/cms_pwri.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2009 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" +#include "asn1_locl.h" + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, ossl_ssize_t passlen) + { + CMS_PasswordRecipientInfo *pwri; + if (ri->type != CMS_RECIPINFO_PASS) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD, CMS_R_NOT_PWRI); + return 0; + } + + pwri = ri->d.pwri; + pwri->pass = pass; + if (pass && passlen < 0) + passlen = strlen((char *)pass); + pwri->passlen = passlen; + return 1; + } + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph) + { + CMS_RecipientInfo *ri = NULL; + CMS_EnvelopedData *env; + CMS_PasswordRecipientInfo *pwri; + EVP_CIPHER_CTX ctx; + X509_ALGOR *encalg = NULL; + unsigned char iv[EVP_MAX_IV_LENGTH]; + int ivlen; + env = cms_get0_enveloped(cms); + if (!env) + goto err; + + if (wrap_nid <= 0) + wrap_nid = NID_id_alg_PWRI_KEK; + + if (pbe_nid <= 0) + pbe_nid = NID_id_pbkdf2; + + /* Get from enveloped data */ + if (kekciph == NULL) + kekciph = env->encryptedContentInfo->cipher; + + if (kekciph == NULL) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, CMS_R_NO_CIPHER); + return NULL; + } + if (wrap_nid != NID_id_alg_PWRI_KEK) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, + CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM); + return NULL; + } + + /* Setup algorithm identifier for cipher */ + encalg = X509_ALGOR_new(); + EVP_CIPHER_CTX_init(&ctx); + + if (EVP_EncryptInit_ex(&ctx, kekciph, NULL, NULL, NULL) <= 0) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, ERR_R_EVP_LIB); + goto err; + } + + ivlen = EVP_CIPHER_CTX_iv_length(&ctx); + + if (ivlen > 0) + { + if (RAND_pseudo_bytes(iv, ivlen) <= 0) + goto err; + if (EVP_EncryptInit_ex(&ctx, NULL, NULL, NULL, iv) <= 0) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, + ERR_R_EVP_LIB); + goto err; + } + encalg->parameter = ASN1_TYPE_new(); + if (!encalg->parameter) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, + ERR_R_MALLOC_FAILURE); + goto err; + } + if (EVP_CIPHER_param_to_asn1(&ctx, encalg->parameter) <= 0) + { + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, + CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); + goto err; + } + } + + + encalg->algorithm = OBJ_nid2obj(EVP_CIPHER_CTX_type(&ctx)); + + EVP_CIPHER_CTX_cleanup(&ctx); + + /* Initialize recipient info */ + ri = M_ASN1_new_of(CMS_RecipientInfo); + if (!ri) + goto merr; + + ri->d.pwri = M_ASN1_new_of(CMS_PasswordRecipientInfo); + if (!ri->d.pwri) + goto merr; + ri->type = CMS_RECIPINFO_PASS; + + pwri = ri->d.pwri; + /* Since this is overwritten, free up empty structure already there */ + X509_ALGOR_free(pwri->keyEncryptionAlgorithm); + pwri->keyEncryptionAlgorithm = X509_ALGOR_new(); + if (!pwri->keyEncryptionAlgorithm) + goto merr; + pwri->keyEncryptionAlgorithm->algorithm = OBJ_nid2obj(wrap_nid); + pwri->keyEncryptionAlgorithm->parameter = ASN1_TYPE_new(); + if (!pwri->keyEncryptionAlgorithm->parameter) + goto merr; + + if(!ASN1_item_pack(encalg, ASN1_ITEM_rptr(X509_ALGOR), + &pwri->keyEncryptionAlgorithm->parameter->value.sequence)) + goto merr; + pwri->keyEncryptionAlgorithm->parameter->type = V_ASN1_SEQUENCE; + + X509_ALGOR_free(encalg); + encalg = NULL; + + /* Setup PBE algorithm */ + + pwri->keyDerivationAlgorithm = PKCS5_pbkdf2_set(iter, NULL, 0, -1, -1); + + if (!pwri->keyDerivationAlgorithm) + goto err; + + CMS_RecipientInfo_set0_password(ri, pass, passlen); + pwri->version = 0; + + if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri)) + goto merr; + + return ri; + + merr: + CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, ERR_R_MALLOC_FAILURE); + err: + EVP_CIPHER_CTX_cleanup(&ctx); + if (ri) + M_ASN1_free_of(ri, CMS_RecipientInfo); + if (encalg) + X509_ALGOR_free(encalg); + return NULL; + + } + +/* This is an implementation of the key wrapping mechanism in RFC3211, + * at some point this should go into EVP. + */ + +static int kek_unwrap_key(unsigned char *out, size_t *outlen, + const unsigned char *in, size_t inlen, EVP_CIPHER_CTX *ctx) + { + size_t blocklen = EVP_CIPHER_CTX_block_size(ctx); + unsigned char *tmp; + int outl, rv = 0; + if (inlen < 2 * blocklen) + { + /* too small */ + return 0; + } + if (inlen % blocklen) + { + /* Invalid size */ + return 0; + } + tmp = OPENSSL_malloc(inlen); + /* setup IV by decrypting last two blocks */ + EVP_DecryptUpdate(ctx, tmp + inlen - 2 * blocklen, &outl, + in + inlen - 2 * blocklen, blocklen * 2); + /* Do a decrypt of last decrypted block to set IV to correct value + * output it to start of buffer so we don't corrupt decrypted block + * this works because buffer is at least two block lengths long. + */ + EVP_DecryptUpdate(ctx, tmp, &outl, + tmp + inlen - blocklen, blocklen); + /* Can now decrypt first n - 1 blocks */ + EVP_DecryptUpdate(ctx, tmp, &outl, in, inlen - blocklen); + + /* Reset IV to original value */ + EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, NULL); + /* Decrypt again */ + EVP_DecryptUpdate(ctx, tmp, &outl, tmp, inlen); + /* Check check bytes */ + if (((tmp[1] ^ tmp[4]) & (tmp[2] ^ tmp[5]) & (tmp[3] ^ tmp[6])) != 0xff) + { + /* Check byte failure */ + goto err; + } + if (inlen < (size_t)(tmp[0] - 4 )) + { + /* Invalid length value */ + goto err; + } + *outlen = (size_t)tmp[0]; + memcpy(out, tmp + 4, *outlen); + rv = 1; + err: + OPENSSL_cleanse(tmp, inlen); + OPENSSL_free(tmp); + return rv; + + } + +static int kek_wrap_key(unsigned char *out, size_t *outlen, + const unsigned char *in, size_t inlen, EVP_CIPHER_CTX *ctx) + { + size_t blocklen = EVP_CIPHER_CTX_block_size(ctx); + size_t olen; + int dummy; + /* First decide length of output buffer: need header and round up to + * multiple of block length. + */ + olen = (inlen + 4 + blocklen - 1)/blocklen; + olen *= blocklen; + if (olen < 2 * blocklen) + { + /* Key too small */ + return 0; + } + if (inlen > 0xFF) + { + /* Key too large */ + return 0; + } + if (out) + { + /* Set header */ + out[0] = (unsigned char)inlen; + out[1] = in[0] ^ 0xFF; + out[2] = in[1] ^ 0xFF; + out[3] = in[2] ^ 0xFF; + memcpy(out + 4, in, inlen); + /* Add random padding to end */ + if (olen > inlen + 4) + RAND_pseudo_bytes(out + 4 + inlen, olen - 4 - inlen); + /* Encrypt twice */ + EVP_EncryptUpdate(ctx, out, &dummy, out, olen); + EVP_EncryptUpdate(ctx, out, &dummy, out, olen); + } + + *outlen = olen; + + return 1; + } + +/* Encrypt/Decrypt content key in PWRI recipient info */ + +int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, + int en_de) + { + CMS_EncryptedContentInfo *ec; + CMS_PasswordRecipientInfo *pwri; + const unsigned char *p = NULL; + int plen; + int r = 0; + X509_ALGOR *algtmp, *kekalg = NULL; + EVP_CIPHER_CTX kekctx; + const EVP_CIPHER *kekcipher; + unsigned char *key = NULL; + size_t keylen; + + ec = cms->d.envelopedData->encryptedContentInfo; + + pwri = ri->d.pwri; + EVP_CIPHER_CTX_init(&kekctx); + + if (!pwri->pass) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, CMS_R_NO_PASSWORD); + return 0; + } + algtmp = pwri->keyEncryptionAlgorithm; + + if (!algtmp || OBJ_obj2nid(algtmp->algorithm) != NID_id_alg_PWRI_KEK) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM); + return 0; + } + + if (algtmp->parameter->type == V_ASN1_SEQUENCE) + { + p = algtmp->parameter->value.sequence->data; + plen = algtmp->parameter->value.sequence->length; + kekalg = d2i_X509_ALGOR(NULL, &p, plen); + } + if (kekalg == NULL) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER); + return 0; + } + + kekcipher = EVP_get_cipherbyobj(kekalg->algorithm); + + if(!kekcipher) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + CMS_R_UNKNOWN_CIPHER); + goto err; + } + + /* Fixup cipher based on AlgorithmIdentifier to set IV etc */ + if (!EVP_CipherInit_ex(&kekctx, kekcipher, NULL, NULL, NULL, en_de)) + goto err; + EVP_CIPHER_CTX_set_padding(&kekctx, 0); + if(EVP_CIPHER_asn1_to_param(&kekctx, kekalg->parameter) < 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); + goto err; + } + + algtmp = pwri->keyDerivationAlgorithm; + + /* Finish password based key derivation to setup key in "ctx" */ + + if (EVP_PBE_CipherInit(algtmp->algorithm, + (char *)pwri->pass, pwri->passlen, + algtmp->parameter, &kekctx, en_de) < 0) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, ERR_R_EVP_LIB); + goto err; + } + + /* Finally wrap/unwrap the key */ + + if (en_de) + { + + if (!kek_wrap_key(NULL, &keylen, ec->key, ec->keylen, &kekctx)) + goto err; + + key = OPENSSL_malloc(keylen); + + if (!key) + goto err; + + if (!kek_wrap_key(key, &keylen, ec->key, ec->keylen, &kekctx)) + goto err; + pwri->encryptedKey->data = key; + pwri->encryptedKey->length = keylen; + } + else + { + key = OPENSSL_malloc(pwri->encryptedKey->length); + + if (!key) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + ERR_R_MALLOC_FAILURE); + goto err; + } + if (!kek_unwrap_key(key, &keylen, + pwri->encryptedKey->data, + pwri->encryptedKey->length, &kekctx)) + { + CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, + CMS_R_UNWRAP_FAILURE); + goto err; + } + + ec->key = key; + ec->keylen = keylen; + + } + + r = 1; + + err: + + EVP_CIPHER_CTX_cleanup(&kekctx); + + if (!r && key) + OPENSSL_free(key); + X509_ALGOR_free(kekalg); + + return r; + + } diff --git a/app/openssl/crypto/cms/cms_sd.c b/app/openssl/crypto/cms/cms_sd.c new file mode 100644 index 00000000..77fbd135 --- /dev/null +++ b/app/openssl/crypto/cms/cms_sd.c @@ -0,0 +1,985 @@ +/* crypto/cms/cms_sd.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include "cms_lcl.h" +#include "asn1_locl.h" + +/* CMS SignedData Utilities */ + +DECLARE_ASN1_ITEM(CMS_SignedData) + +static CMS_SignedData *cms_get0_signed(CMS_ContentInfo *cms) + { + if (OBJ_obj2nid(cms->contentType) != NID_pkcs7_signed) + { + CMSerr(CMS_F_CMS_GET0_SIGNED, CMS_R_CONTENT_TYPE_NOT_SIGNED_DATA); + return NULL; + } + return cms->d.signedData; + } + +static CMS_SignedData *cms_signed_data_init(CMS_ContentInfo *cms) + { + if (cms->d.other == NULL) + { + cms->d.signedData = M_ASN1_new_of(CMS_SignedData); + if (!cms->d.signedData) + { + CMSerr(CMS_F_CMS_SIGNED_DATA_INIT, ERR_R_MALLOC_FAILURE); + return NULL; + } + cms->d.signedData->version = 1; + cms->d.signedData->encapContentInfo->eContentType = + OBJ_nid2obj(NID_pkcs7_data); + cms->d.signedData->encapContentInfo->partial = 1; + ASN1_OBJECT_free(cms->contentType); + cms->contentType = OBJ_nid2obj(NID_pkcs7_signed); + return cms->d.signedData; + } + return cms_get0_signed(cms); + } + +/* Just initialize SignedData e.g. for certs only structure */ + +int CMS_SignedData_init(CMS_ContentInfo *cms) + { + if (cms_signed_data_init(cms)) + return 1; + else + return 0; + } + +/* Check structures and fixup version numbers (if necessary) */ + +static void cms_sd_set_version(CMS_SignedData *sd) + { + int i; + CMS_CertificateChoices *cch; + CMS_RevocationInfoChoice *rch; + CMS_SignerInfo *si; + + for (i = 0; i < sk_CMS_CertificateChoices_num(sd->certificates); i++) + { + cch = sk_CMS_CertificateChoices_value(sd->certificates, i); + if (cch->type == CMS_CERTCHOICE_OTHER) + { + if (sd->version < 5) + sd->version = 5; + } + else if (cch->type == CMS_CERTCHOICE_V2ACERT) + { + if (sd->version < 4) + sd->version = 4; + } + else if (cch->type == CMS_CERTCHOICE_V1ACERT) + { + if (sd->version < 3) + sd->version = 3; + } + } + + for (i = 0; i < sk_CMS_RevocationInfoChoice_num(sd->crls); i++) + { + rch = sk_CMS_RevocationInfoChoice_value(sd->crls, i); + if (rch->type == CMS_REVCHOICE_OTHER) + { + if (sd->version < 5) + sd->version = 5; + } + } + + if ((OBJ_obj2nid(sd->encapContentInfo->eContentType) != NID_pkcs7_data) + && (sd->version < 3)) + sd->version = 3; + + for (i = 0; i < sk_CMS_SignerInfo_num(sd->signerInfos); i++) + { + si = sk_CMS_SignerInfo_value(sd->signerInfos, i); + if (si->sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) + { + if (si->version < 3) + si->version = 3; + if (sd->version < 3) + sd->version = 3; + } + else + sd->version = 1; + } + + if (sd->version < 1) + sd->version = 1; + + } + +/* Copy an existing messageDigest value */ + +static int cms_copy_messageDigest(CMS_ContentInfo *cms, CMS_SignerInfo *si) + { + STACK_OF(CMS_SignerInfo) *sinfos; + CMS_SignerInfo *sitmp; + int i; + sinfos = CMS_get0_SignerInfos(cms); + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + ASN1_OCTET_STRING *messageDigest; + sitmp = sk_CMS_SignerInfo_value(sinfos, i); + if (sitmp == si) + continue; + if (CMS_signed_get_attr_count(sitmp) < 0) + continue; + if (OBJ_cmp(si->digestAlgorithm->algorithm, + sitmp->digestAlgorithm->algorithm)) + continue; + messageDigest = CMS_signed_get0_data_by_OBJ(sitmp, + OBJ_nid2obj(NID_pkcs9_messageDigest), + -3, V_ASN1_OCTET_STRING); + if (!messageDigest) + { + CMSerr(CMS_F_CMS_COPY_MESSAGEDIGEST, + CMS_R_ERROR_READING_MESSAGEDIGEST_ATTRIBUTE); + return 0; + } + + if (CMS_signed_add1_attr_by_NID(si, NID_pkcs9_messageDigest, + V_ASN1_OCTET_STRING, + messageDigest, -1)) + return 1; + else + return 0; + } + CMSerr(CMS_F_CMS_COPY_MESSAGEDIGEST, CMS_R_NO_MATCHING_DIGEST); + return 0; + } + +int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type) + { + switch(type) + { + case CMS_SIGNERINFO_ISSUER_SERIAL: + sid->d.issuerAndSerialNumber = + M_ASN1_new_of(CMS_IssuerAndSerialNumber); + if (!sid->d.issuerAndSerialNumber) + goto merr; + if (!X509_NAME_set(&sid->d.issuerAndSerialNumber->issuer, + X509_get_issuer_name(cert))) + goto merr; + if (!ASN1_STRING_copy( + sid->d.issuerAndSerialNumber->serialNumber, + X509_get_serialNumber(cert))) + goto merr; + break; + + case CMS_SIGNERINFO_KEYIDENTIFIER: + if (!cert->skid) + { + CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, + CMS_R_CERTIFICATE_HAS_NO_KEYID); + return 0; + } + sid->d.subjectKeyIdentifier = ASN1_STRING_dup(cert->skid); + if (!sid->d.subjectKeyIdentifier) + goto merr; + break; + + default: + CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, CMS_R_UNKNOWN_ID); + return 0; + } + + sid->type = type; + + return 1; + + merr: + CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, ERR_R_MALLOC_FAILURE); + return 0; + + } + +int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno) + { + if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) + { + if (issuer) + *issuer = sid->d.issuerAndSerialNumber->issuer; + if (sno) + *sno = sid->d.issuerAndSerialNumber->serialNumber; + } + else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) + { + if (keyid) + *keyid = sid->d.subjectKeyIdentifier; + } + else + return 0; + return 1; + } + +int cms_SignerIdentifier_cert_cmp(CMS_SignerIdentifier *sid, X509 *cert) + { + int ret; + if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) + { + ret = X509_NAME_cmp(sid->d.issuerAndSerialNumber->issuer, + X509_get_issuer_name(cert)); + if (ret) + return ret; + return ASN1_INTEGER_cmp(sid->d.issuerAndSerialNumber->serialNumber, + X509_get_serialNumber(cert)); + } + else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) + { + X509_check_purpose(cert, -1, -1); + if (!cert->skid) + return -1; + return ASN1_OCTET_STRING_cmp(sid->d.subjectKeyIdentifier, + cert->skid); + } + else + return -1; + } + +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags) + { + CMS_SignedData *sd; + CMS_SignerInfo *si = NULL; + X509_ALGOR *alg; + int i, type; + if(!X509_check_private_key(signer, pk)) + { + CMSerr(CMS_F_CMS_ADD1_SIGNER, + CMS_R_PRIVATE_KEY_DOES_NOT_MATCH_CERTIFICATE); + return NULL; + } + sd = cms_signed_data_init(cms); + if (!sd) + goto err; + si = M_ASN1_new_of(CMS_SignerInfo); + if (!si) + goto merr; + X509_check_purpose(signer, -1, -1); + + CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY); + CRYPTO_add(&signer->references, 1, CRYPTO_LOCK_X509); + + si->pkey = pk; + si->signer = signer; + + if (flags & CMS_USE_KEYID) + { + si->version = 3; + if (sd->version < 3) + sd->version = 3; + type = CMS_SIGNERINFO_KEYIDENTIFIER; + } + else + { + type = CMS_SIGNERINFO_ISSUER_SERIAL; + si->version = 1; + } + + if (!cms_set1_SignerIdentifier(si->sid, signer, type)) + goto err; + + if (md == NULL) + { + int def_nid; + if (EVP_PKEY_get_default_digest_nid(pk, &def_nid) <= 0) + goto err; + md = EVP_get_digestbynid(def_nid); + if (md == NULL) + { + CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_NO_DEFAULT_DIGEST); + goto err; + } + } + + if (!md) + { + CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_NO_DIGEST_SET); + goto err; + } + + cms_DigestAlgorithm_set(si->digestAlgorithm, md); + + /* See if digest is present in digestAlgorithms */ + for (i = 0; i < sk_X509_ALGOR_num(sd->digestAlgorithms); i++) + { + ASN1_OBJECT *aoid; + alg = sk_X509_ALGOR_value(sd->digestAlgorithms, i); + X509_ALGOR_get0(&aoid, NULL, NULL, alg); + if (OBJ_obj2nid(aoid) == EVP_MD_type(md)) + break; + } + + if (i == sk_X509_ALGOR_num(sd->digestAlgorithms)) + { + alg = X509_ALGOR_new(); + if (!alg) + goto merr; + cms_DigestAlgorithm_set(alg, md); + if (!sk_X509_ALGOR_push(sd->digestAlgorithms, alg)) + { + X509_ALGOR_free(alg); + goto merr; + } + } + + if (pk->ameth && pk->ameth->pkey_ctrl) + { + i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_SIGN, + 0, si); + if (i == -2) + { + CMSerr(CMS_F_CMS_ADD1_SIGNER, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + goto err; + } + if (i <= 0) + { + CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_CTRL_FAILURE); + goto err; + } + } + + if (!(flags & CMS_NOATTR)) + { + /* Initialialize signed attributes strutucture so other + * attributes such as signing time etc are added later + * even if we add none here. + */ + if (!si->signedAttrs) + { + si->signedAttrs = sk_X509_ATTRIBUTE_new_null(); + if (!si->signedAttrs) + goto merr; + } + + if (!(flags & CMS_NOSMIMECAP)) + { + STACK_OF(X509_ALGOR) *smcap = NULL; + i = CMS_add_standard_smimecap(&smcap); + if (i) + i = CMS_add_smimecap(si, smcap); + sk_X509_ALGOR_pop_free(smcap, X509_ALGOR_free); + if (!i) + goto merr; + } + if (flags & CMS_REUSE_DIGEST) + { + if (!cms_copy_messageDigest(cms, si)) + goto err; + if (!(flags & CMS_PARTIAL) && + !CMS_SignerInfo_sign(si)) + goto err; + } + } + + if (!(flags & CMS_NOCERTS)) + { + /* NB ignore -1 return for duplicate cert */ + if (!CMS_add1_cert(cms, signer)) + goto merr; + } + + if (!sd->signerInfos) + sd->signerInfos = sk_CMS_SignerInfo_new_null(); + if (!sd->signerInfos || + !sk_CMS_SignerInfo_push(sd->signerInfos, si)) + goto merr; + + return si; + + merr: + CMSerr(CMS_F_CMS_ADD1_SIGNER, ERR_R_MALLOC_FAILURE); + err: + if (si) + M_ASN1_free_of(si, CMS_SignerInfo); + return NULL; + + } + +static int cms_add1_signingTime(CMS_SignerInfo *si, ASN1_TIME *t) + { + ASN1_TIME *tt; + int r = 0; + if (t) + tt = t; + else + tt = X509_gmtime_adj(NULL, 0); + + if (!tt) + goto merr; + + if (CMS_signed_add1_attr_by_NID(si, NID_pkcs9_signingTime, + tt->type, tt, -1) <= 0) + goto merr; + + r = 1; + + merr: + + if (!t) + ASN1_TIME_free(tt); + + if (!r) + CMSerr(CMS_F_CMS_ADD1_SIGNINGTIME, ERR_R_MALLOC_FAILURE); + + return r; + + } + +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms) + { + CMS_SignedData *sd; + sd = cms_get0_signed(cms); + if (!sd) + return NULL; + return sd->signerInfos; + } + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms) + { + STACK_OF(X509) *signers = NULL; + STACK_OF(CMS_SignerInfo) *sinfos; + CMS_SignerInfo *si; + int i; + sinfos = CMS_get0_SignerInfos(cms); + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + if (si->signer) + { + if (!signers) + { + signers = sk_X509_new_null(); + if (!signers) + return NULL; + } + if (!sk_X509_push(signers, si->signer)) + { + sk_X509_free(signers); + return NULL; + } + } + } + return signers; + } + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer) + { + if (signer) + { + CRYPTO_add(&signer->references, 1, CRYPTO_LOCK_X509); + if (si->pkey) + EVP_PKEY_free(si->pkey); + si->pkey = X509_get_pubkey(signer); + } + if (si->signer) + X509_free(si->signer); + si->signer = signer; + } + +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno) + { + return cms_SignerIdentifier_get0_signer_id(si->sid, keyid, issuer, sno); + } + +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert) + { + return cms_SignerIdentifier_cert_cmp(si->sid, cert); + } + +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *scerts, + unsigned int flags) + { + CMS_SignedData *sd; + CMS_SignerInfo *si; + CMS_CertificateChoices *cch; + STACK_OF(CMS_CertificateChoices) *certs; + X509 *x; + int i, j; + int ret = 0; + sd = cms_get0_signed(cms); + if (!sd) + return -1; + certs = sd->certificates; + for (i = 0; i < sk_CMS_SignerInfo_num(sd->signerInfos); i++) + { + si = sk_CMS_SignerInfo_value(sd->signerInfos, i); + if (si->signer) + continue; + + for (j = 0; j < sk_X509_num(scerts); j++) + { + x = sk_X509_value(scerts, j); + if (CMS_SignerInfo_cert_cmp(si, x) == 0) + { + CMS_SignerInfo_set1_signer_cert(si, x); + ret++; + break; + } + } + + if (si->signer || (flags & CMS_NOINTERN)) + continue; + + for (j = 0; j < sk_CMS_CertificateChoices_num(certs); j++) + { + cch = sk_CMS_CertificateChoices_value(certs, j); + if (cch->type != 0) + continue; + x = cch->d.certificate; + if (CMS_SignerInfo_cert_cmp(si, x) == 0) + { + CMS_SignerInfo_set1_signer_cert(si, x); + ret++; + break; + } + } + } + return ret; + } + +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, X509 **signer, + X509_ALGOR **pdig, X509_ALGOR **psig) + { + if (pk) + *pk = si->pkey; + if (signer) + *signer = si->signer; + if (pdig) + *pdig = si->digestAlgorithm; + if (psig) + *psig = si->signatureAlgorithm; + } + +static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, + CMS_SignerInfo *si, BIO *chain) + { + EVP_MD_CTX mctx; + int r = 0; + EVP_MD_CTX_init(&mctx); + + + if (!si->pkey) + { + CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, CMS_R_NO_PRIVATE_KEY); + return 0; + } + + if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) + goto err; + + /* If any signed attributes calculate and add messageDigest attribute */ + + if (CMS_signed_get_attr_count(si) >= 0) + { + ASN1_OBJECT *ctype = + cms->d.signedData->encapContentInfo->eContentType; + unsigned char md[EVP_MAX_MD_SIZE]; + unsigned int mdlen; + if (!EVP_DigestFinal_ex(&mctx, md, &mdlen)) + goto err; + if (!CMS_signed_add1_attr_by_NID(si, NID_pkcs9_messageDigest, + V_ASN1_OCTET_STRING, + md, mdlen)) + goto err; + /* Copy content type across */ + if (CMS_signed_add1_attr_by_NID(si, NID_pkcs9_contentType, + V_ASN1_OBJECT, ctype, -1) <= 0) + goto err; + if (!CMS_SignerInfo_sign(si)) + goto err; + } + else + { + unsigned char *sig; + unsigned int siglen; + sig = OPENSSL_malloc(EVP_PKEY_size(si->pkey)); + if (!sig) + { + CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, + ERR_R_MALLOC_FAILURE); + goto err; + } + if (!EVP_SignFinal(&mctx, sig, &siglen, si->pkey)) + { + CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, + CMS_R_SIGNFINAL_ERROR); + OPENSSL_free(sig); + goto err; + } + ASN1_STRING_set0(si->signature, sig, siglen); + } + + r = 1; + + err: + EVP_MD_CTX_cleanup(&mctx); + return r; + + } + +int cms_SignedData_final(CMS_ContentInfo *cms, BIO *chain) + { + STACK_OF(CMS_SignerInfo) *sinfos; + CMS_SignerInfo *si; + int i; + sinfos = CMS_get0_SignerInfos(cms); + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + if (!cms_SignerInfo_content_sign(cms, si, chain)) + return 0; + } + cms->d.signedData->encapContentInfo->partial = 0; + return 1; + } + +int CMS_SignerInfo_sign(CMS_SignerInfo *si) + { + EVP_MD_CTX mctx; + EVP_PKEY_CTX *pctx; + unsigned char *abuf = NULL; + int alen; + size_t siglen; + const EVP_MD *md = NULL; + + md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm); + if (md == NULL) + return 0; + + EVP_MD_CTX_init(&mctx); + + if (CMS_signed_get_attr_by_NID(si, NID_pkcs9_signingTime, -1) < 0) + { + if (!cms_add1_signingTime(si, NULL)) + goto err; + } + + if (EVP_DigestSignInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) + goto err; + + if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, + EVP_PKEY_CTRL_CMS_SIGN, 0, si) <= 0) + { + CMSerr(CMS_F_CMS_SIGNERINFO_SIGN, CMS_R_CTRL_ERROR); + goto err; + } + + alen = ASN1_item_i2d((ASN1_VALUE *)si->signedAttrs,&abuf, + ASN1_ITEM_rptr(CMS_Attributes_Sign)); + if(!abuf) + goto err; + if (EVP_DigestSignUpdate(&mctx, abuf, alen) <= 0) + goto err; + if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0) + goto err; + OPENSSL_free(abuf); + abuf = OPENSSL_malloc(siglen); + if(!abuf) + goto err; + if (EVP_DigestSignFinal(&mctx, abuf, &siglen) <= 0) + goto err; + + if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, + EVP_PKEY_CTRL_CMS_SIGN, 1, si) <= 0) + { + CMSerr(CMS_F_CMS_SIGNERINFO_SIGN, CMS_R_CTRL_ERROR); + goto err; + } + + EVP_MD_CTX_cleanup(&mctx); + + ASN1_STRING_set0(si->signature, abuf, siglen); + + return 1; + + err: + if (abuf) + OPENSSL_free(abuf); + EVP_MD_CTX_cleanup(&mctx); + return 0; + + } + +int CMS_SignerInfo_verify(CMS_SignerInfo *si) + { + EVP_MD_CTX mctx; + EVP_PKEY_CTX *pctx; + unsigned char *abuf = NULL; + int alen, r = -1; + const EVP_MD *md = NULL; + + if (!si->pkey) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY, CMS_R_NO_PUBLIC_KEY); + return -1; + } + + md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm); + if (md == NULL) + return -1; + EVP_MD_CTX_init(&mctx); + if (EVP_DigestVerifyInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) + goto err; + + alen = ASN1_item_i2d((ASN1_VALUE *)si->signedAttrs,&abuf, + ASN1_ITEM_rptr(CMS_Attributes_Verify)); + if(!abuf) + goto err; + r = EVP_DigestVerifyUpdate(&mctx, abuf, alen); + OPENSSL_free(abuf); + if (r <= 0) + { + r = -1; + goto err; + } + r = EVP_DigestVerifyFinal(&mctx, + si->signature->data, si->signature->length); + if (r <= 0) + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY, CMS_R_VERIFICATION_FAILURE); + err: + EVP_MD_CTX_cleanup(&mctx); + return r; + } + +/* Create a chain of digest BIOs from a CMS ContentInfo */ + +BIO *cms_SignedData_init_bio(CMS_ContentInfo *cms) + { + int i; + CMS_SignedData *sd; + BIO *chain = NULL; + sd = cms_get0_signed(cms); + if (!sd) + return NULL; + if (cms->d.signedData->encapContentInfo->partial) + cms_sd_set_version(sd); + for (i = 0; i < sk_X509_ALGOR_num(sd->digestAlgorithms); i++) + { + X509_ALGOR *digestAlgorithm; + BIO *mdbio; + digestAlgorithm = sk_X509_ALGOR_value(sd->digestAlgorithms, i); + mdbio = cms_DigestAlgorithm_init_bio(digestAlgorithm); + if (!mdbio) + goto err; + if (chain) + BIO_push(chain, mdbio); + else + chain = mdbio; + } + return chain; + err: + if (chain) + BIO_free_all(chain); + return NULL; + } + +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) + { + ASN1_OCTET_STRING *os = NULL; + EVP_MD_CTX mctx; + int r = -1; + EVP_MD_CTX_init(&mctx); + /* If we have any signed attributes look for messageDigest value */ + if (CMS_signed_get_attr_count(si) >= 0) + { + os = CMS_signed_get0_data_by_OBJ(si, + OBJ_nid2obj(NID_pkcs9_messageDigest), + -3, V_ASN1_OCTET_STRING); + if (!os) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_ERROR_READING_MESSAGEDIGEST_ATTRIBUTE); + goto err; + } + } + + if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) + goto err; + + /* If messageDigest found compare it */ + + if (os) + { + unsigned char mval[EVP_MAX_MD_SIZE]; + unsigned int mlen; + if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_UNABLE_TO_FINALIZE_CONTEXT); + goto err; + } + if (mlen != (unsigned int)os->length) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH); + goto err; + } + + if (memcmp(mval, os->data, mlen)) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_VERIFICATION_FAILURE); + r = 0; + } + else + r = 1; + } + else + { + r = EVP_VerifyFinal(&mctx, si->signature->data, + si->signature->length, si->pkey); + if (r <= 0) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_VERIFICATION_FAILURE); + r = 0; + } + } + + err: + EVP_MD_CTX_cleanup(&mctx); + return r; + + } + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs) + { + unsigned char *smder = NULL; + int smderlen, r; + smderlen = i2d_X509_ALGORS(algs, &smder); + if (smderlen <= 0) + return 0; + r = CMS_signed_add1_attr_by_NID(si, NID_SMIMECapabilities, + V_ASN1_SEQUENCE, smder, smderlen); + OPENSSL_free(smder); + return r; + } + +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize) + { + X509_ALGOR *alg; + ASN1_INTEGER *key = NULL; + if (keysize > 0) + { + key = ASN1_INTEGER_new(); + if (!key || !ASN1_INTEGER_set(key, keysize)) + return 0; + } + alg = X509_ALGOR_new(); + if (!alg) + { + if (key) + ASN1_INTEGER_free(key); + return 0; + } + + X509_ALGOR_set0(alg, OBJ_nid2obj(algnid), + key ? V_ASN1_INTEGER : V_ASN1_UNDEF, key); + if (!*algs) + *algs = sk_X509_ALGOR_new_null(); + if (!*algs || !sk_X509_ALGOR_push(*algs, alg)) + { + X509_ALGOR_free(alg); + return 0; + } + return 1; + } + +/* Check to see if a cipher exists and if so add S/MIME capabilities */ + +static int cms_add_cipher_smcap(STACK_OF(X509_ALGOR) **sk, int nid, int arg) + { + if (EVP_get_cipherbynid(nid)) + return CMS_add_simple_smimecap(sk, nid, arg); + return 1; + } + +static int cms_add_digest_smcap(STACK_OF(X509_ALGOR) **sk, int nid, int arg) + { + if (EVP_get_digestbynid(nid)) + return CMS_add_simple_smimecap(sk, nid, arg); + return 1; + } + +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap) + { + if (!cms_add_cipher_smcap(smcap, NID_aes_256_cbc, -1) + || !cms_add_digest_smcap(smcap, NID_id_GostR3411_94, -1) + || !cms_add_cipher_smcap(smcap, NID_id_Gost28147_89, -1) + || !cms_add_cipher_smcap(smcap, NID_aes_192_cbc, -1) + || !cms_add_cipher_smcap(smcap, NID_aes_128_cbc, -1) + || !cms_add_cipher_smcap(smcap, NID_des_ede3_cbc, -1) + || !cms_add_cipher_smcap(smcap, NID_rc2_cbc, 128) + || !cms_add_cipher_smcap(smcap, NID_rc2_cbc, 64) + || !cms_add_cipher_smcap(smcap, NID_des_cbc, -1) + || !cms_add_cipher_smcap(smcap, NID_rc2_cbc, 40)) + return 0; + return 1; + } diff --git a/app/openssl/crypto/cms/cms_smime.c b/app/openssl/crypto/cms/cms_smime.c new file mode 100644 index 00000000..8c56e3a8 --- /dev/null +++ b/app/openssl/crypto/cms/cms_smime.c @@ -0,0 +1,850 @@ +/* crypto/cms/cms_smime.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include "cms_lcl.h" + +static int cms_copy_content(BIO *out, BIO *in, unsigned int flags) + { + unsigned char buf[4096]; + int r = 0, i; + BIO *tmpout = NULL; + + if (out == NULL) + tmpout = BIO_new(BIO_s_null()); + else if (flags & CMS_TEXT) + { + tmpout = BIO_new(BIO_s_mem()); + BIO_set_mem_eof_return(tmpout, 0); + } + else + tmpout = out; + + if(!tmpout) + { + CMSerr(CMS_F_CMS_COPY_CONTENT,ERR_R_MALLOC_FAILURE); + goto err; + } + + /* Read all content through chain to process digest, decrypt etc */ + for (;;) + { + i=BIO_read(in,buf,sizeof(buf)); + if (i <= 0) + { + if (BIO_method_type(in) == BIO_TYPE_CIPHER) + { + if (!BIO_get_cipher_status(in)) + goto err; + } + if (i < 0) + goto err; + break; + } + + if (tmpout && (BIO_write(tmpout, buf, i) != i)) + goto err; + } + + if(flags & CMS_TEXT) + { + if(!SMIME_text(tmpout, out)) + { + CMSerr(CMS_F_CMS_COPY_CONTENT,CMS_R_SMIME_TEXT_ERROR); + goto err; + } + } + + r = 1; + + err: + if (tmpout && (tmpout != out)) + BIO_free(tmpout); + return r; + + } + +static int check_content(CMS_ContentInfo *cms) + { + ASN1_OCTET_STRING **pos = CMS_get0_content(cms); + if (!pos || !*pos) + { + CMSerr(CMS_F_CHECK_CONTENT, CMS_R_NO_CONTENT); + return 0; + } + return 1; + } + +static void do_free_upto(BIO *f, BIO *upto) + { + if (upto) + { + BIO *tbio; + do + { + tbio = BIO_pop(f); + BIO_free(f); + f = tbio; + } + while (f != upto); + } + else + BIO_free_all(f); + } + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags) + { + BIO *cont; + int r; + if (OBJ_obj2nid(CMS_get0_type(cms)) != NID_pkcs7_data) + { + CMSerr(CMS_F_CMS_DATA, CMS_R_TYPE_NOT_DATA); + return 0; + } + cont = CMS_dataInit(cms, NULL); + if (!cont) + return 0; + r = cms_copy_content(out, cont, flags); + BIO_free_all(cont); + return r; + } + +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags) + { + CMS_ContentInfo *cms; + cms = cms_Data_create(); + if (!cms) + return NULL; + + if ((flags & CMS_STREAM) || CMS_final(cms, in, NULL, flags)) + return cms; + + CMS_ContentInfo_free(cms); + + return NULL; + } + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags) + { + BIO *cont; + int r; + if (OBJ_obj2nid(CMS_get0_type(cms)) != NID_pkcs7_digest) + { + CMSerr(CMS_F_CMS_DIGEST_VERIFY, CMS_R_TYPE_NOT_DIGESTED_DATA); + return 0; + } + + if (!dcont && !check_content(cms)) + return 0; + + cont = CMS_dataInit(cms, dcont); + if (!cont) + return 0; + r = cms_copy_content(out, cont, flags); + if (r) + r = cms_DigestedData_do_final(cms, cont, 1); + do_free_upto(cont, dcont); + return r; + } + +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags) + { + CMS_ContentInfo *cms; + if (!md) + md = EVP_sha1(); + cms = cms_DigestedData_create(md); + if (!cms) + return NULL; + + if(!(flags & CMS_DETACHED)) + CMS_set_detached(cms, 0); + + if ((flags & CMS_STREAM) || CMS_final(cms, in, NULL, flags)) + return cms; + + CMS_ContentInfo_free(cms); + return NULL; + } + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags) + { + BIO *cont; + int r; + if (OBJ_obj2nid(CMS_get0_type(cms)) != NID_pkcs7_encrypted) + { + CMSerr(CMS_F_CMS_ENCRYPTEDDATA_DECRYPT, + CMS_R_TYPE_NOT_ENCRYPTED_DATA); + return 0; + } + + if (!dcont && !check_content(cms)) + return 0; + + if (CMS_EncryptedData_set1_key(cms, NULL, key, keylen) <= 0) + return 0; + cont = CMS_dataInit(cms, dcont); + if (!cont) + return 0; + r = cms_copy_content(out, cont, flags); + do_free_upto(cont, dcont); + return r; + } + +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, size_t keylen, + unsigned int flags) + { + CMS_ContentInfo *cms; + if (!cipher) + { + CMSerr(CMS_F_CMS_ENCRYPTEDDATA_ENCRYPT, CMS_R_NO_CIPHER); + return NULL; + } + cms = CMS_ContentInfo_new(); + if (!cms) + return NULL; + if (!CMS_EncryptedData_set1_key(cms, cipher, key, keylen)) + return NULL; + + if(!(flags & CMS_DETACHED)) + CMS_set_detached(cms, 0); + + if ((flags & (CMS_STREAM|CMS_PARTIAL)) + || CMS_final(cms, in, NULL, flags)) + return cms; + + CMS_ContentInfo_free(cms); + return NULL; + } + +static int cms_signerinfo_verify_cert(CMS_SignerInfo *si, + X509_STORE *store, + STACK_OF(X509) *certs, + STACK_OF(X509_CRL) *crls, + unsigned int flags) + { + X509_STORE_CTX ctx; + X509 *signer; + int i, j, r = 0; + CMS_SignerInfo_get0_algs(si, NULL, &signer, NULL, NULL); + if (!X509_STORE_CTX_init(&ctx, store, signer, certs)) + { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CERT, + CMS_R_STORE_INIT_ERROR); + goto err; + } + X509_STORE_CTX_set_default(&ctx, "smime_sign"); + if (crls) + X509_STORE_CTX_set0_crls(&ctx, crls); + + i = X509_verify_cert(&ctx); + if (i <= 0) + { + j = X509_STORE_CTX_get_error(&ctx); + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CERT, + CMS_R_CERTIFICATE_VERIFY_ERROR); + ERR_add_error_data(2, "Verify error:", + X509_verify_cert_error_string(j)); + goto err; + } + r = 1; + err: + X509_STORE_CTX_cleanup(&ctx); + return r; + + } + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags) + { + CMS_SignerInfo *si; + STACK_OF(CMS_SignerInfo) *sinfos; + STACK_OF(X509) *cms_certs = NULL; + STACK_OF(X509_CRL) *crls = NULL; + X509 *signer; + int i, scount = 0, ret = 0; + BIO *cmsbio = NULL, *tmpin = NULL; + + if (!dcont && !check_content(cms)) + return 0; + + /* Attempt to find all signer certificates */ + + sinfos = CMS_get0_SignerInfos(cms); + + if (sk_CMS_SignerInfo_num(sinfos) <= 0) + { + CMSerr(CMS_F_CMS_VERIFY, CMS_R_NO_SIGNERS); + goto err; + } + + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + CMS_SignerInfo_get0_algs(si, NULL, &signer, NULL, NULL); + if (signer) + scount++; + } + + if (scount != sk_CMS_SignerInfo_num(sinfos)) + scount += CMS_set1_signers_certs(cms, certs, flags); + + if (scount != sk_CMS_SignerInfo_num(sinfos)) + { + CMSerr(CMS_F_CMS_VERIFY, CMS_R_SIGNER_CERTIFICATE_NOT_FOUND); + goto err; + } + + /* Attempt to verify all signers certs */ + + if (!(flags & CMS_NO_SIGNER_CERT_VERIFY)) + { + cms_certs = CMS_get1_certs(cms); + if (!(flags & CMS_NOCRL)) + crls = CMS_get1_crls(cms); + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + if (!cms_signerinfo_verify_cert(si, store, + cms_certs, crls, flags)) + goto err; + } + } + + /* Attempt to verify all SignerInfo signed attribute signatures */ + + if (!(flags & CMS_NO_ATTR_VERIFY)) + { + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + if (CMS_signed_get_attr_count(si) < 0) + continue; + if (CMS_SignerInfo_verify(si) <= 0) + goto err; + } + } + + /* Performance optimization: if the content is a memory BIO then + * store its contents in a temporary read only memory BIO. This + * avoids potentially large numbers of slow copies of data which will + * occur when reading from a read write memory BIO when signatures + * are calculated. + */ + + if (dcont && (BIO_method_type(dcont) == BIO_TYPE_MEM)) + { + char *ptr; + long len; + len = BIO_get_mem_data(dcont, &ptr); + tmpin = BIO_new_mem_buf(ptr, len); + if (tmpin == NULL) + { + CMSerr(CMS_F_CMS_VERIFY,ERR_R_MALLOC_FAILURE); + return 0; + } + } + else + tmpin = dcont; + + + cmsbio=CMS_dataInit(cms, tmpin); + if (!cmsbio) + goto err; + + if (!cms_copy_content(out, cmsbio, flags)) + goto err; + + if (!(flags & CMS_NO_CONTENT_VERIFY)) + { + for (i = 0; i < sk_CMS_SignerInfo_num(sinfos); i++) + { + si = sk_CMS_SignerInfo_value(sinfos, i); + if (CMS_SignerInfo_verify_content(si, cmsbio) <= 0) + { + CMSerr(CMS_F_CMS_VERIFY, + CMS_R_CONTENT_VERIFY_ERROR); + goto err; + } + } + } + + ret = 1; + + err: + + if (dcont && (tmpin == dcont)) + do_free_upto(cmsbio, dcont); + else + BIO_free_all(cmsbio); + + if (cms_certs) + sk_X509_pop_free(cms_certs, X509_free); + if (crls) + sk_X509_CRL_pop_free(crls, X509_CRL_free); + + return ret; + } + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags) + { + int r; + flags &= ~(CMS_DETACHED|CMS_TEXT); + r = CMS_verify(rcms, certs, store, NULL, NULL, flags); + if (r <= 0) + return r; + return cms_Receipt_verify(rcms, ocms); + } + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, unsigned int flags) + { + CMS_ContentInfo *cms; + int i; + + cms = CMS_ContentInfo_new(); + if (!cms || !CMS_SignedData_init(cms)) + goto merr; + + if (pkey && !CMS_add1_signer(cms, signcert, pkey, NULL, flags)) + { + CMSerr(CMS_F_CMS_SIGN, CMS_R_ADD_SIGNER_ERROR); + goto err; + } + + for (i = 0; i < sk_X509_num(certs); i++) + { + X509 *x = sk_X509_value(certs, i); + if (!CMS_add1_cert(cms, x)) + goto merr; + } + + if(!(flags & CMS_DETACHED)) + CMS_set_detached(cms, 0); + + if ((flags & (CMS_STREAM|CMS_PARTIAL)) + || CMS_final(cms, data, NULL, flags)) + return cms; + else + goto err; + + merr: + CMSerr(CMS_F_CMS_SIGN, ERR_R_MALLOC_FAILURE); + + err: + if (cms) + CMS_ContentInfo_free(cms); + return NULL; + } + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, + unsigned int flags) + { + CMS_SignerInfo *rct_si; + CMS_ContentInfo *cms = NULL; + ASN1_OCTET_STRING **pos, *os; + BIO *rct_cont = NULL; + int r = 0; + + flags &= ~(CMS_STREAM|CMS_TEXT); + /* Not really detached but avoids content being allocated */ + flags |= CMS_PARTIAL|CMS_BINARY|CMS_DETACHED; + if (!pkey || !signcert) + { + CMSerr(CMS_F_CMS_SIGN_RECEIPT, CMS_R_NO_KEY_OR_CERT); + return NULL; + } + + /* Initialize signed data */ + + cms = CMS_sign(NULL, NULL, certs, NULL, flags); + if (!cms) + goto err; + + /* Set inner content type to signed receipt */ + if (!CMS_set1_eContentType(cms, OBJ_nid2obj(NID_id_smime_ct_receipt))) + goto err; + + rct_si = CMS_add1_signer(cms, signcert, pkey, NULL, flags); + if (!rct_si) + { + CMSerr(CMS_F_CMS_SIGN_RECEIPT, CMS_R_ADD_SIGNER_ERROR); + goto err; + } + + os = cms_encode_Receipt(si); + + if (!os) + goto err; + + /* Set content to digest */ + rct_cont = BIO_new_mem_buf(os->data, os->length); + if (!rct_cont) + goto err; + + /* Add msgSigDigest attribute */ + + if (!cms_msgSigDigest_add1(rct_si, si)) + goto err; + + /* Finalize structure */ + if (!CMS_final(cms, rct_cont, NULL, flags)) + goto err; + + /* Set embedded content */ + pos = CMS_get0_content(cms); + *pos = os; + + r = 1; + + err: + if (rct_cont) + BIO_free(rct_cont); + if (r) + return cms; + CMS_ContentInfo_free(cms); + return NULL; + + } + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *data, + const EVP_CIPHER *cipher, unsigned int flags) + { + CMS_ContentInfo *cms; + int i; + X509 *recip; + cms = CMS_EnvelopedData_create(cipher); + if (!cms) + goto merr; + for (i = 0; i < sk_X509_num(certs); i++) + { + recip = sk_X509_value(certs, i); + if (!CMS_add1_recipient_cert(cms, recip, flags)) + { + CMSerr(CMS_F_CMS_ENCRYPT, CMS_R_RECIPIENT_ERROR); + goto err; + } + } + + if(!(flags & CMS_DETACHED)) + CMS_set_detached(cms, 0); + + if ((flags & (CMS_STREAM|CMS_PARTIAL)) + || CMS_final(cms, data, NULL, flags)) + return cms; + else + goto err; + + merr: + CMSerr(CMS_F_CMS_ENCRYPT, ERR_R_MALLOC_FAILURE); + err: + if (cms) + CMS_ContentInfo_free(cms); + return NULL; + } + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) + { + STACK_OF(CMS_RecipientInfo) *ris; + CMS_RecipientInfo *ri; + int i, r; + int debug = 0; + ris = CMS_get0_RecipientInfos(cms); + if (ris) + debug = cms->d.envelopedData->encryptedContentInfo->debug; + for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) + { + ri = sk_CMS_RecipientInfo_value(ris, i); + if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS) + continue; + /* If we have a cert try matching RecipientInfo + * otherwise try them all. + */ + if (!cert || (CMS_RecipientInfo_ktri_cert_cmp(ri, cert) == 0)) + { + CMS_RecipientInfo_set0_pkey(ri, pk); + r = CMS_RecipientInfo_decrypt(cms, ri); + CMS_RecipientInfo_set0_pkey(ri, NULL); + if (cert) + { + /* If not debugging clear any error and + * return success to avoid leaking of + * information useful to MMA + */ + if (!debug) + { + ERR_clear_error(); + return 1; + } + if (r > 0) + return 1; + CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, + CMS_R_DECRYPT_ERROR); + return 0; + } + /* If no cert and not debugging don't leave loop + * after first successful decrypt. Always attempt + * to decrypt all recipients to avoid leaking timing + * of a successful decrypt. + */ + else if (r > 0 && debug) + return 1; + } + } + /* If no cert and not debugging always return success */ + if (!cert && !debug) + { + ERR_clear_error(); + return 1; + } + + CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, CMS_R_NO_MATCHING_RECIPIENT); + return 0; + + } + +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen) + { + STACK_OF(CMS_RecipientInfo) *ris; + CMS_RecipientInfo *ri; + int i, r; + ris = CMS_get0_RecipientInfos(cms); + for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) + { + ri = sk_CMS_RecipientInfo_value(ris, i); + if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_KEK) + continue; + + /* If we have an id try matching RecipientInfo + * otherwise try them all. + */ + if (!id || (CMS_RecipientInfo_kekri_id_cmp(ri, id, idlen) == 0)) + { + CMS_RecipientInfo_set0_key(ri, key, keylen); + r = CMS_RecipientInfo_decrypt(cms, ri); + CMS_RecipientInfo_set0_key(ri, NULL, 0); + if (r > 0) + return 1; + if (id) + { + CMSerr(CMS_F_CMS_DECRYPT_SET1_KEY, + CMS_R_DECRYPT_ERROR); + return 0; + } + ERR_clear_error(); + } + } + + CMSerr(CMS_F_CMS_DECRYPT_SET1_KEY, CMS_R_NO_MATCHING_RECIPIENT); + return 0; + + } + +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen) + { + STACK_OF(CMS_RecipientInfo) *ris; + CMS_RecipientInfo *ri; + int i, r; + ris = CMS_get0_RecipientInfos(cms); + for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) + { + ri = sk_CMS_RecipientInfo_value(ris, i); + if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_PASS) + continue; + CMS_RecipientInfo_set0_password(ri, pass, passlen); + r = CMS_RecipientInfo_decrypt(cms, ri); + CMS_RecipientInfo_set0_password(ri, NULL, 0); + if (r > 0) + return 1; + } + + CMSerr(CMS_F_CMS_DECRYPT_SET1_PASSWORD, CMS_R_NO_MATCHING_RECIPIENT); + return 0; + + } + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert, + BIO *dcont, BIO *out, + unsigned int flags) + { + int r; + BIO *cont; + if (OBJ_obj2nid(CMS_get0_type(cms)) != NID_pkcs7_enveloped) + { + CMSerr(CMS_F_CMS_DECRYPT, CMS_R_TYPE_NOT_ENVELOPED_DATA); + return 0; + } + if (!dcont && !check_content(cms)) + return 0; + if (flags & CMS_DEBUG_DECRYPT) + cms->d.envelopedData->encryptedContentInfo->debug = 1; + else + cms->d.envelopedData->encryptedContentInfo->debug = 0; + if (!pk && !cert && !dcont && !out) + return 1; + if (pk && !CMS_decrypt_set1_pkey(cms, pk, cert)) + return 0; + cont = CMS_dataInit(cms, dcont); + if (!cont) + return 0; + r = cms_copy_content(out, cont, flags); + do_free_upto(cont, dcont); + return r; + } + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, unsigned int flags) + { + BIO *cmsbio; + int ret = 0; + if (!(cmsbio = CMS_dataInit(cms, dcont))) + { + CMSerr(CMS_F_CMS_FINAL,ERR_R_MALLOC_FAILURE); + return 0; + } + + SMIME_crlf_copy(data, cmsbio, flags); + + (void)BIO_flush(cmsbio); + + + if (!CMS_dataFinal(cms, cmsbio)) + { + CMSerr(CMS_F_CMS_FINAL,CMS_R_CMS_DATAFINAL_ERROR); + goto err; + } + + ret = 1; + + err: + do_free_upto(cmsbio, dcont); + + return ret; + + } + +#ifdef ZLIB + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags) + { + BIO *cont; + int r; + if (OBJ_obj2nid(CMS_get0_type(cms)) != NID_id_smime_ct_compressedData) + { + CMSerr(CMS_F_CMS_UNCOMPRESS, + CMS_R_TYPE_NOT_COMPRESSED_DATA); + return 0; + } + + if (!dcont && !check_content(cms)) + return 0; + + cont = CMS_dataInit(cms, dcont); + if (!cont) + return 0; + r = cms_copy_content(out, cont, flags); + do_free_upto(cont, dcont); + return r; + } + +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags) + { + CMS_ContentInfo *cms; + if (comp_nid <= 0) + comp_nid = NID_zlib_compression; + cms = cms_CompressedData_create(comp_nid); + if (!cms) + return NULL; + + if(!(flags & CMS_DETACHED)) + CMS_set_detached(cms, 0); + + if ((flags & CMS_STREAM) || CMS_final(cms, in, NULL, flags)) + return cms; + + CMS_ContentInfo_free(cms); + return NULL; + } + +#else + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags) + { + CMSerr(CMS_F_CMS_UNCOMPRESS, CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM); + return 0; + } + +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags) + { + CMSerr(CMS_F_CMS_COMPRESS, CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM); + return NULL; + } + +#endif diff --git a/app/openssl/crypto/comp/c_rle.c b/app/openssl/crypto/comp/c_rle.c index 18bceae5..47dfb67f 100644 --- a/app/openssl/crypto/comp/c_rle.c +++ b/app/openssl/crypto/comp/c_rle.c @@ -30,7 +30,7 @@ static int rle_compress_block(COMP_CTX *ctx, unsigned char *out, { /* int i; */ - if (olen < (ilen+1)) + if (ilen == 0 || olen < (ilen-1)) { /* ZZZZZZZZZZZZZZZZZZZZZZ */ return(-1); @@ -46,7 +46,7 @@ static int rle_expand_block(COMP_CTX *ctx, unsigned char *out, { int i; - if (ilen == 0 || olen < (ilen-1)) + if (olen < (ilen-1)) { /* ZZZZZZZZZZZZZZZZZZZZZZ */ return(-1); diff --git a/app/openssl/crypto/conf/conf_mall.c b/app/openssl/crypto/conf/conf_mall.c index c6f4cb2d..213890e0 100644 --- a/app/openssl/crypto/conf/conf_mall.c +++ b/app/openssl/crypto/conf/conf_mall.c @@ -76,5 +76,6 @@ void OPENSSL_load_builtin_modules(void) #ifndef OPENSSL_NO_ENGINE ENGINE_add_conf_module(); #endif + EVP_add_alg_module(); } diff --git a/app/openssl/crypto/cpt_err.c b/app/openssl/crypto/cpt_err.c index 139b9284..289005f6 100644 --- a/app/openssl/crypto/cpt_err.c +++ b/app/openssl/crypto/cpt_err.c @@ -1,6 +1,6 @@ /* crypto/cpt_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -76,6 +76,7 @@ static ERR_STRING_DATA CRYPTO_str_functs[]= {ERR_FUNC(CRYPTO_F_CRYPTO_SET_EX_DATA), "CRYPTO_set_ex_data"}, {ERR_FUNC(CRYPTO_F_DEF_ADD_INDEX), "DEF_ADD_INDEX"}, {ERR_FUNC(CRYPTO_F_DEF_GET_CLASS), "DEF_GET_CLASS"}, +{ERR_FUNC(CRYPTO_F_FIPS_MODE_SET), "FIPS_mode_set"}, {ERR_FUNC(CRYPTO_F_INT_DUP_EX_DATA), "INT_DUP_EX_DATA"}, {ERR_FUNC(CRYPTO_F_INT_FREE_EX_DATA), "INT_FREE_EX_DATA"}, {ERR_FUNC(CRYPTO_F_INT_NEW_EX_DATA), "INT_NEW_EX_DATA"}, @@ -84,6 +85,7 @@ static ERR_STRING_DATA CRYPTO_str_functs[]= static ERR_STRING_DATA CRYPTO_str_reasons[]= { +{ERR_REASON(CRYPTO_R_FIPS_MODE_NOT_SUPPORTED),"fips mode not supported"}, {ERR_REASON(CRYPTO_R_NO_DYNLOCK_CREATE_CALLBACK),"no dynlock create callback"}, {0,NULL} }; diff --git a/app/openssl/crypto/cryptlib.c b/app/openssl/crypto/cryptlib.c index 24fe123e..0b77d8b7 100644 --- a/app/openssl/crypto/cryptlib.c +++ b/app/openssl/crypto/cryptlib.c @@ -409,6 +409,10 @@ int (*CRYPTO_get_add_lock_callback(void))(int *num,int mount,int type, void CRYPTO_set_locking_callback(void (*func)(int mode,int type, const char *file,int line)) { + /* Calling this here ensures initialisation before any threads + * are started. + */ + OPENSSL_init(); locking_callback=func; } @@ -500,7 +504,7 @@ void CRYPTO_THREADID_current(CRYPTO_THREADID *id) CRYPTO_THREADID_set_numeric(id, (unsigned long)find_thread(NULL)); #else /* For everything else, default to using the address of 'errno' */ - CRYPTO_THREADID_set_pointer(id, &errno); + CRYPTO_THREADID_set_pointer(id, (void*)&errno); #endif } @@ -661,28 +665,53 @@ const char *CRYPTO_get_lock_name(int type) defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -unsigned long OPENSSL_ia32cap_P=0; -unsigned long *OPENSSL_ia32cap_loc(void) { return &OPENSSL_ia32cap_P; } +unsigned int OPENSSL_ia32cap_P[2]; +unsigned long *OPENSSL_ia32cap_loc(void) +{ if (sizeof(long)==4) + /* + * If 32-bit application pulls address of OPENSSL_ia32cap_P[0] + * clear second element to maintain the illusion that vector + * is 32-bit. + */ + OPENSSL_ia32cap_P[1]=0; + return (unsigned long *)OPENSSL_ia32cap_P; +} #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) #define OPENSSL_CPUID_SETUP +#if defined(_WIN32) +typedef unsigned __int64 IA32CAP; +#else +typedef unsigned long long IA32CAP; +#endif void OPENSSL_cpuid_setup(void) { static int trigger=0; - unsigned long OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP vec; char *env; if (trigger) return; trigger=1; - if ((env=getenv("OPENSSL_ia32cap"))) - OPENSSL_ia32cap_P = strtoul(env,NULL,0)|(1<<10); + if ((env=getenv("OPENSSL_ia32cap"))) { + int off = (env[0]=='~')?1:0; +#if defined(_WIN32) + if (!sscanf(env+off,"%I64i",&vec)) vec = strtoul(env+off,NULL,0); +#else + if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); +#endif + if (off) vec = OPENSSL_ia32_cpuid()&~vec; + } else - OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid()|(1<<10); + vec = OPENSSL_ia32_cpuid(); + /* * |(1<<10) sets a reserved bit to signal that variable * was initialized already... This is to avoid interference * with cpuid snippets in ELF .init segment. */ + OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); + OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); } #endif @@ -860,7 +889,7 @@ void OPENSSL_showfatal (const char *fmta,...) #if defined(_WIN32_WINNT) && _WIN32_WINNT>=0x0333 /* this -------------v--- guards NT-specific calls */ - if (GetVersion() < 0x80000000 && OPENSSL_isservice() > 0) + if (check_winnt() && OPENSSL_isservice() > 0) { HANDLE h = RegisterEventSource(0,_T("OPENSSL")); const TCHAR *pmsg=buf; ReportEvent(h,EVENTLOG_ERROR_TYPE,0,0,0,1,0,&pmsg,0); @@ -896,3 +925,16 @@ void OpenSSLDie(const char *file,int line,const char *assertion) } void *OPENSSL_stderr(void) { return stderr; } + +int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len) + { + size_t i; + const unsigned char *a = in_a; + const unsigned char *b = in_b; + unsigned char x = 0; + + for (i = 0; i < len; i++) + x |= a[i] ^ b[i]; + + return x; + } diff --git a/app/openssl/crypto/cryptlib.h b/app/openssl/crypto/cryptlib.h index fc249c57..d26f9630 100644 --- a/app/openssl/crypto/cryptlib.h +++ b/app/openssl/crypto/cryptlib.h @@ -99,8 +99,8 @@ extern "C" { #define HEX_SIZE(type) (sizeof(type)*2) void OPENSSL_cpuid_setup(void); -extern unsigned long OPENSSL_ia32cap_P; -void OPENSSL_showfatal(const char *,...); +extern unsigned int OPENSSL_ia32cap_P[]; +void OPENSSL_showfatal(const char *fmta,...); void *OPENSSL_stderr(void); extern int OPENSSL_NONPIC_relocated; diff --git a/app/openssl/crypto/crypto.h b/app/openssl/crypto/crypto.h index b0360cec..f92fc518 100644 --- a/app/openssl/crypto/crypto.h +++ b/app/openssl/crypto/crypto.h @@ -488,10 +488,10 @@ void CRYPTO_get_mem_debug_functions(void (**m)(void *,int,const char *,int,int), long (**go)(void)); void *CRYPTO_malloc_locked(int num, const char *file, int line); -void CRYPTO_free_locked(void *); +void CRYPTO_free_locked(void *ptr); void *CRYPTO_malloc(int num, const char *file, int line); char *CRYPTO_strdup(const char *str, const char *file, int line); -void CRYPTO_free(void *); +void CRYPTO_free(void *ptr); void *CRYPTO_realloc(void *addr,int num, const char *file, int line); void *CRYPTO_realloc_clean(void *addr,int old_num,int num,const char *file, int line); @@ -547,6 +547,40 @@ unsigned long *OPENSSL_ia32cap_loc(void); #define OPENSSL_ia32cap (*(OPENSSL_ia32cap_loc())) int OPENSSL_isservice(void); +int FIPS_mode(void); +int FIPS_mode_set(int r); + +void OPENSSL_init(void); + +#define fips_md_init(alg) fips_md_init_ctx(alg, alg) + +#ifdef OPENSSL_FIPS +#define fips_md_init_ctx(alg, cx) \ + int alg##_Init(cx##_CTX *c) \ + { \ + if (FIPS_mode()) OpenSSLDie(__FILE__, __LINE__, \ + "Low level API call to digest " #alg " forbidden in FIPS mode!"); \ + return private_##alg##_Init(c); \ + } \ + int private_##alg##_Init(cx##_CTX *c) + +#define fips_cipher_abort(alg) \ + if (FIPS_mode()) OpenSSLDie(__FILE__, __LINE__, \ + "Low level API call to cipher " #alg " forbidden in FIPS mode!") + +#else +#define fips_md_init_ctx(alg, cx) \ + int alg##_Init(cx##_CTX *c) +#define fips_cipher_abort(alg) while(0) +#endif + +/* CRYPTO_memcmp returns zero iff the |len| bytes at |a| and |b| are equal. It + * takes an amount of time dependent on |len|, but independent of the contents + * of |a| and |b|. Unlike memcmp, it cannot be used to put elements into a + * defined order as the return value when a != b is undefined, other than to be + * non-zero. */ +int CRYPTO_memcmp(const void *a, const void *b, size_t len); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -562,11 +596,13 @@ void ERR_load_CRYPTO_strings(void); #define CRYPTO_F_CRYPTO_SET_EX_DATA 102 #define CRYPTO_F_DEF_ADD_INDEX 104 #define CRYPTO_F_DEF_GET_CLASS 105 +#define CRYPTO_F_FIPS_MODE_SET 109 #define CRYPTO_F_INT_DUP_EX_DATA 106 #define CRYPTO_F_INT_FREE_EX_DATA 107 #define CRYPTO_F_INT_NEW_EX_DATA 108 /* Reason codes. */ +#define CRYPTO_R_FIPS_MODE_NOT_SUPPORTED 101 #define CRYPTO_R_NO_DYNLOCK_CREATE_CALLBACK 100 #ifdef __cplusplus diff --git a/app/openssl/crypto/des/asm/crypt586.S b/app/openssl/crypto/des/asm/crypt586.S new file mode 100644 index 00000000..fb321ba9 --- /dev/null +++ b/app/openssl/crypto/des/asm/crypt586.S @@ -0,0 +1,879 @@ +.file "crypt586.s" +.text +.globl fcrypt_body +.type fcrypt_body,@function +.align 16 +fcrypt_body: +.L_fcrypt_body_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + xorl %edi,%edi + xorl %esi,%esi + call .L000PIC_me_up +.L000PIC_me_up: + popl %edx + leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx + movl DES_SPtrans@GOT(%edx),%edx + pushl %edx + movl 28(%esp),%ebp + pushl $25 +.L001start: + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl (%ebp),%ebx + xorl %ebx,%eax + movl 4(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 8(%ebp),%ebx + xorl %ebx,%eax + movl 12(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 16(%ebp),%ebx + xorl %ebx,%eax + movl 20(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 24(%ebp),%ebx + xorl %ebx,%eax + movl 28(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 32(%ebp),%ebx + xorl %ebx,%eax + movl 36(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 40(%ebp),%ebx + xorl %ebx,%eax + movl 44(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 48(%ebp),%ebx + xorl %ebx,%eax + movl 52(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 56(%ebp),%ebx + xorl %ebx,%eax + movl 60(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 64(%ebp),%ebx + xorl %ebx,%eax + movl 68(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 72(%ebp),%ebx + xorl %ebx,%eax + movl 76(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 80(%ebp),%ebx + xorl %ebx,%eax + movl 84(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 88(%ebp),%ebx + xorl %ebx,%eax + movl 92(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 96(%ebp),%ebx + xorl %ebx,%eax + movl 100(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 104(%ebp),%ebx + xorl %ebx,%eax + movl 108(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 112(%ebp),%ebx + xorl %ebx,%eax + movl 116(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 120(%ebp),%ebx + xorl %ebx,%eax + movl 124(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + movl (%esp),%ebx + movl %edi,%eax + decl %ebx + movl %esi,%edi + movl %eax,%esi + movl %ebx,(%esp) + jnz .L001start + + + movl 28(%esp),%edx + rorl $1,%edi + movl %esi,%eax + xorl %edi,%esi + andl $0xaaaaaaaa,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $23,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $10,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x33333333,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $18,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xfff0000f,%edi + xorl %edi,%esi + xorl %edi,%eax + + roll $12,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xf0f0f0f0,%esi + xorl %esi,%edi + xorl %esi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %edi,4(%edx) + addl $8,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size fcrypt_body,.-.L_fcrypt_body_begin diff --git a/app/openssl/crypto/des/asm/des-586.S b/app/openssl/crypto/des/asm/des-586.S new file mode 100644 index 00000000..2fbd340d --- /dev/null +++ b/app/openssl/crypto/des/asm/des-586.S @@ -0,0 +1,1837 @@ +.file "des-586.s" +.text +.globl DES_SPtrans +.type _x86_DES_encrypt,@function +.align 16 +_x86_DES_encrypt: + pushl %ecx + + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.size _x86_DES_encrypt,.-_x86_DES_encrypt +.type _x86_DES_decrypt,@function +.align 16 +_x86_DES_decrypt: + pushl %ecx + + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.size _x86_DES_decrypt,.-_x86_DES_decrypt +.globl DES_encrypt1 +.type DES_encrypt1,@function +.align 16 +DES_encrypt1: +.L_DES_encrypt1_begin: + pushl %esi + pushl %edi + + + movl 12(%esp),%esi + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%esi),%eax + movl 28(%esp),%ebx + movl 4(%esi),%edi + + + roll $4,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xf0f0f0f0,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $20,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xfff0000f,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $14,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x33333333,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $22,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x03fc03fc,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $9,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xaaaaaaaa,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $1,%edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal DES_SPtrans-.L000pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je .L001decrypt + call _x86_DES_encrypt + jmp .L002done +.L001decrypt: + call _x86_DES_decrypt +.L002done: + + + movl 20(%esp),%edx + rorl $1,%esi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %esi,4(%edx) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.size DES_encrypt1,.-.L_DES_encrypt1_begin +.globl DES_encrypt2 +.type DES_encrypt2,@function +.align 16 +DES_encrypt2: +.L_DES_encrypt2_begin: + pushl %esi + pushl %edi + + + movl 12(%esp),%eax + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%eax),%esi + movl 28(%esp),%ebx + roll $3,%esi + movl 4(%eax),%edi + roll $3,%edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal DES_SPtrans-.L003pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je .L004decrypt + call _x86_DES_encrypt + jmp .L005done +.L004decrypt: + call _x86_DES_decrypt +.L005done: + + + rorl $3,%edi + movl 20(%esp),%eax + rorl $3,%esi + movl %edi,(%eax) + movl %esi,4(%eax) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.size DES_encrypt2,.-.L_DES_encrypt2_begin +.globl DES_encrypt3 +.type DES_encrypt3,@function +.align 16 +DES_encrypt3: +.L_DES_encrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%eax + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%esi + movl $1,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $0,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $1,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.size DES_encrypt3,.-.L_DES_encrypt3_begin +.globl DES_decrypt3 +.type DES_decrypt3,@function +.align 16 +DES_decrypt3: +.L_DES_decrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%esi + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%eax + movl $0,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $1,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $0,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.size DES_decrypt3,.-.L_DES_decrypt3_begin +.globl DES_ncbc_encrypt +.type DES_ncbc_encrypt,@function +.align 16 +DES_ncbc_encrypt: +.L_DES_ncbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + pushl %ecx + + movl 52(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L006decrypt + andl $4294967288,%ebp + movl 12(%esp),%eax + movl 16(%esp),%ebx + jz .L007encrypt_finish +.L008encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L008encrypt_loop +.L007encrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz .L009finish + call .L010PIC_point +.L010PIC_point: + popl %edx + leal .L011cbc_enc_jmp_table-.L010PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L012ej7: + movb 6(%esi),%dh + shll $8,%edx +.L013ej6: + movb 5(%esi),%dh +.L014ej5: + movb 4(%esi),%dl +.L015ej4: + movl (%esi),%ecx + jmp .L016ejend +.L017ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L018ej2: + movb 1(%esi),%ch +.L019ej1: + movb (%esi),%cl +.L016ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L009finish +.L006decrypt: + andl $4294967288,%ebp + movl 20(%esp),%eax + movl 24(%esp),%ebx + jz .L020decrypt_finish +.L021decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,20(%esp) + movl %ebx,24(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L021decrypt_loop +.L020decrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz .L009finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L022dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L023dj6: + movb %dh,5(%edi) +.L024dj5: + movb %dl,4(%edi) +.L025dj4: + movl %ecx,(%edi) + jmp .L026djend +.L027dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L028dj2: + movb %ch,1(%esi) +.L029dj1: + movb %cl,(%esi) +.L026djend: + jmp .L009finish +.L009finish: + movl 64(%esp),%ecx + addl $28,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L011cbc_enc_jmp_table: +.long 0 +.long .L019ej1-.L010PIC_point +.long .L018ej2-.L010PIC_point +.long .L017ej3-.L010PIC_point +.long .L015ej4-.L010PIC_point +.long .L014ej5-.L010PIC_point +.long .L013ej6-.L010PIC_point +.long .L012ej7-.L010PIC_point +.align 64 +.size DES_ncbc_encrypt,.-.L_DES_ncbc_encrypt_begin +.globl DES_ede3_cbc_encrypt +.type DES_ede3_cbc_encrypt,@function +.align 16 +DES_ede3_cbc_encrypt: +.L_DES_ede3_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 44(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 64(%esp),%ecx + + movl 56(%esp),%eax + pushl %eax + + movl 56(%esp),%eax + pushl %eax + + movl 56(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L030decrypt + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L031encrypt_finish +.L032encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L032encrypt_loop +.L031encrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz .L033finish + call .L034PIC_point +.L034PIC_point: + popl %edx + leal .L035cbc_enc_jmp_table-.L034PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L036ej7: + movb 6(%esi),%dh + shll $8,%edx +.L037ej6: + movb 5(%esi),%dh +.L038ej5: + movb 4(%esi),%dl +.L039ej4: + movl (%esi),%ecx + jmp .L040ejend +.L041ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L042ej2: + movb 1(%esi),%ch +.L043ej1: + movb (%esi),%cl +.L040ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L033finish +.L030decrypt: + andl $4294967288,%ebp + movl 24(%esp),%eax + movl 28(%esp),%ebx + jz .L044decrypt_finish +.L045decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,24(%esp) + movl %ebx,28(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L045decrypt_loop +.L044decrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz .L033finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L046dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L047dj6: + movb %dh,5(%edi) +.L048dj5: + movb %dl,4(%edi) +.L049dj4: + movl %ecx,(%edi) + jmp .L050djend +.L051dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L052dj2: + movb %ch,1(%esi) +.L053dj1: + movb %cl,(%esi) +.L050djend: + jmp .L033finish +.L033finish: + movl 76(%esp),%ecx + addl $32,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L035cbc_enc_jmp_table: +.long 0 +.long .L043ej1-.L034PIC_point +.long .L042ej2-.L034PIC_point +.long .L041ej3-.L034PIC_point +.long .L039ej4-.L034PIC_point +.long .L038ej5-.L034PIC_point +.long .L037ej6-.L034PIC_point +.long .L036ej7-.L034PIC_point +.align 64 +.size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin +.align 64 +DES_SPtrans: +.long 34080768,524288,33554434,34080770 +.long 33554432,526338,524290,33554434 +.long 526338,34080768,34078720,2050 +.long 33556482,33554432,0,524290 +.long 524288,2,33556480,526336 +.long 34080770,34078720,2050,33556480 +.long 2,2048,526336,34078722 +.long 2048,33556482,34078722,0 +.long 0,34080770,33556480,524290 +.long 34080768,524288,2050,33556480 +.long 34078722,2048,526336,33554434 +.long 526338,2,33554434,34078720 +.long 34080770,526336,34078720,33556482 +.long 33554432,2050,524290,0 +.long 524288,33554432,33556482,34080768 +.long 2,34078722,2048,526338 +.long 1074823184,0,1081344,1074790400 +.long 1073741840,32784,1073774592,1081344 +.long 32768,1074790416,16,1073774592 +.long 1048592,1074823168,1074790400,16 +.long 1048576,1073774608,1074790416,32768 +.long 1081360,1073741824,0,1048592 +.long 1073774608,1081360,1074823168,1073741840 +.long 1073741824,1048576,32784,1074823184 +.long 1048592,1074823168,1073774592,1081360 +.long 1074823184,1048592,1073741840,0 +.long 1073741824,32784,1048576,1074790416 +.long 32768,1073741824,1081360,1073774608 +.long 1074823168,32768,0,1073741840 +.long 16,1074823184,1081344,1074790400 +.long 1074790416,1048576,32784,1073774592 +.long 1073774608,16,1074790400,1081344 +.long 67108865,67371264,256,67109121 +.long 262145,67108864,67109121,262400 +.long 67109120,262144,67371008,1 +.long 67371265,257,1,67371009 +.long 0,262145,67371264,256 +.long 257,67371265,262144,67108865 +.long 67371009,67109120,262401,67371008 +.long 262400,0,67108864,262401 +.long 67371264,256,1,262144 +.long 257,262145,67371008,67109121 +.long 0,67371264,262400,67371009 +.long 262145,67108864,67371265,1 +.long 262401,67108865,67108864,67371265 +.long 262144,67109120,67109121,262400 +.long 67109120,0,67371009,257 +.long 67108865,262401,256,67371008 +.long 4198408,268439552,8,272633864 +.long 0,272629760,268439560,4194312 +.long 272633856,268435464,268435456,4104 +.long 268435464,4198408,4194304,268435456 +.long 272629768,4198400,4096,8 +.long 4198400,268439560,272629760,4096 +.long 4104,0,4194312,272633856 +.long 268439552,272629768,272633864,4194304 +.long 272629768,4104,4194304,268435464 +.long 4198400,268439552,8,272629760 +.long 268439560,0,4096,4194312 +.long 0,272629768,272633856,4096 +.long 268435456,272633864,4198408,4194304 +.long 272633864,8,268439552,4198408 +.long 4194312,4198400,272629760,268439560 +.long 4104,268435456,268435464,272633856 +.long 134217728,65536,1024,134284320 +.long 134283296,134218752,66592,134283264 +.long 65536,32,134217760,66560 +.long 134218784,134283296,134284288,0 +.long 66560,134217728,65568,1056 +.long 134218752,66592,0,134217760 +.long 32,134218784,134284320,65568 +.long 134283264,1024,1056,134284288 +.long 134284288,134218784,65568,134283264 +.long 65536,32,134217760,134218752 +.long 134217728,66560,134284320,0 +.long 66592,134217728,1024,65568 +.long 134218784,1024,0,134284320 +.long 134283296,134284288,1056,65536 +.long 66560,134283296,134218752,1056 +.long 32,66592,134283264,134217760 +.long 2147483712,2097216,0,2149588992 +.long 2097216,8192,2147491904,2097152 +.long 8256,2149589056,2105344,2147483648 +.long 2147491840,2147483712,2149580800,2105408 +.long 2097152,2147491904,2149580864,0 +.long 8192,64,2149588992,2149580864 +.long 2149589056,2149580800,2147483648,8256 +.long 64,2105344,2105408,2147491840 +.long 8256,2147483648,2147491840,2105408 +.long 2149588992,2097216,0,2147491840 +.long 2147483648,8192,2149580864,2097152 +.long 2097216,2149589056,2105344,64 +.long 2149589056,2105344,2097152,2147491904 +.long 2147483712,2149580800,2105408,0 +.long 8192,2147483712,2147491904,2149588992 +.long 2149580800,8256,64,2149580864 +.long 16384,512,16777728,16777220 +.long 16794116,16388,16896,0 +.long 16777216,16777732,516,16793600 +.long 4,16794112,16793600,516 +.long 16777732,16384,16388,16794116 +.long 0,16777728,16777220,16896 +.long 16793604,16900,16794112,4 +.long 16900,16793604,512,16777216 +.long 16900,16793600,16793604,516 +.long 16384,512,16777216,16793604 +.long 16777732,16900,16896,0 +.long 512,16777220,4,16777728 +.long 0,16777732,16777728,16896 +.long 516,16384,16794116,16777216 +.long 16794112,4,16388,16794116 +.long 16777220,16794112,16793600,16388 +.long 545259648,545390592,131200,0 +.long 537001984,8388736,545259520,545390720 +.long 128,536870912,8519680,131200 +.long 8519808,537002112,536871040,545259520 +.long 131072,8519808,8388736,537001984 +.long 545390720,536871040,0,8519680 +.long 536870912,8388608,537002112,545259648 +.long 8388608,131072,545390592,128 +.long 8388608,131072,536871040,545390720 +.long 131200,536870912,0,8519680 +.long 545259648,537002112,537001984,8388736 +.long 545390592,128,8388736,537001984 +.long 545390720,8388608,545259520,536871040 +.long 8519680,131200,537002112,545259520 +.long 128,545390592,8519808,0 +.long 536870912,545259648,131072,8519808 diff --git a/app/openssl/crypto/des/des.h b/app/openssl/crypto/des/des.h index 92b66635..1eaedcbd 100644 --- a/app/openssl/crypto/des/des.h +++ b/app/openssl/crypto/des/des.h @@ -224,6 +224,9 @@ int DES_set_key(const_DES_cblock *key,DES_key_schedule *schedule); int DES_key_sched(const_DES_cblock *key,DES_key_schedule *schedule); int DES_set_key_checked(const_DES_cblock *key,DES_key_schedule *schedule); void DES_set_key_unchecked(const_DES_cblock *key,DES_key_schedule *schedule); +#ifdef OPENSSL_FIPS +void private_DES_set_key_unchecked(const_DES_cblock *key,DES_key_schedule *schedule); +#endif void DES_string_to_key(const char *str,DES_cblock *key); void DES_string_to_2keys(const char *str,DES_cblock *key1,DES_cblock *key2); void DES_cfb64_encrypt(const unsigned char *in,unsigned char *out,long length, diff --git a/app/openssl/crypto/des/set_key.c b/app/openssl/crypto/des/set_key.c index 3004cc3a..da4d62e1 100644 --- a/app/openssl/crypto/des/set_key.c +++ b/app/openssl/crypto/des/set_key.c @@ -63,6 +63,7 @@ * 1.1 added norm_expand_bits * 1.0 First working version */ +#include #include "des_locl.h" OPENSSL_IMPLEMENT_GLOBAL(int,DES_check_key,0) /* defaults to false */ @@ -335,6 +336,13 @@ int DES_set_key_checked(const_DES_cblock *key, DES_key_schedule *schedule) } void DES_set_key_unchecked(const_DES_cblock *key, DES_key_schedule *schedule) +#ifdef OPENSSL_FIPS + { + fips_cipher_abort(DES); + private_DES_set_key_unchecked(key, schedule); + } +void private_DES_set_key_unchecked(const_DES_cblock *key, DES_key_schedule *schedule) +#endif { static const int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; register DES_LONG c,d,t,s,t2; diff --git a/app/openssl/crypto/des/str2key.c b/app/openssl/crypto/des/str2key.c index 9c2054bd..1077f99d 100644 --- a/app/openssl/crypto/des/str2key.c +++ b/app/openssl/crypto/des/str2key.c @@ -56,8 +56,8 @@ * [including the GNU Public Licence.] */ -#include "des_locl.h" #include +#include "des_locl.h" void DES_string_to_key(const char *str, DES_cblock *key) { diff --git a/app/openssl/crypto/dh/dh.h b/app/openssl/crypto/dh/dh.h index 849309a4..ea59e610 100644 --- a/app/openssl/crypto/dh/dh.h +++ b/app/openssl/crypto/dh/dh.h @@ -86,6 +86,21 @@ * be used for all exponents. */ +/* If this flag is set the DH method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its reposibility + * to ensure the result is compliant. + */ + +#define DH_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define DH_FLAG_NON_FIPS_ALLOW 0x0400 + #ifdef __cplusplus extern "C" { #endif @@ -230,6 +245,9 @@ void ERR_load_DH_strings(void); #define DH_F_COMPUTE_KEY 102 #define DH_F_DHPARAMS_PRINT_FP 101 #define DH_F_DH_BUILTIN_GENPARAMS 106 +#define DH_F_DH_COMPUTE_KEY 114 +#define DH_F_DH_GENERATE_KEY 115 +#define DH_F_DH_GENERATE_PARAMETERS_EX 116 #define DH_F_DH_NEW_METHOD 105 #define DH_F_DH_PARAM_DECODE 107 #define DH_F_DH_PRIV_DECODE 110 @@ -249,7 +267,9 @@ void ERR_load_DH_strings(void); #define DH_R_DECODE_ERROR 104 #define DH_R_INVALID_PUBKEY 102 #define DH_R_KEYS_NOT_SET 108 +#define DH_R_KEY_SIZE_TOO_SMALL 110 #define DH_R_MODULUS_TOO_LARGE 103 +#define DH_R_NON_FIPS_METHOD 111 #define DH_R_NO_PARAMETERS_SET 107 #define DH_R_NO_PRIVATE_VALUE 100 #define DH_R_PARAMETER_ENCODING_ERROR 105 diff --git a/app/openssl/crypto/dh/dh_ameth.c b/app/openssl/crypto/dh/dh_ameth.c index 377caf96..02ec2d47 100644 --- a/app/openssl/crypto/dh/dh_ameth.c +++ b/app/openssl/crypto/dh/dh_ameth.c @@ -493,6 +493,7 @@ const EVP_PKEY_ASN1_METHOD dh_asn1_meth = dh_copy_parameters, dh_cmp_parameters, dh_param_print, + 0, int_dh_free, 0 diff --git a/app/openssl/crypto/dh/dh_err.c b/app/openssl/crypto/dh/dh_err.c index d5cf0c22..56d3df73 100644 --- a/app/openssl/crypto/dh/dh_err.c +++ b/app/openssl/crypto/dh/dh_err.c @@ -1,6 +1,6 @@ /* crypto/dh/dh_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,6 +73,9 @@ static ERR_STRING_DATA DH_str_functs[]= {ERR_FUNC(DH_F_COMPUTE_KEY), "COMPUTE_KEY"}, {ERR_FUNC(DH_F_DHPARAMS_PRINT_FP), "DHparams_print_fp"}, {ERR_FUNC(DH_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"}, +{ERR_FUNC(DH_F_DH_COMPUTE_KEY), "DH_compute_key"}, +{ERR_FUNC(DH_F_DH_GENERATE_KEY), "DH_generate_key"}, +{ERR_FUNC(DH_F_DH_GENERATE_PARAMETERS_EX), "DH_generate_parameters_ex"}, {ERR_FUNC(DH_F_DH_NEW_METHOD), "DH_new_method"}, {ERR_FUNC(DH_F_DH_PARAM_DECODE), "DH_PARAM_DECODE"}, {ERR_FUNC(DH_F_DH_PRIV_DECODE), "DH_PRIV_DECODE"}, @@ -95,7 +98,9 @@ static ERR_STRING_DATA DH_str_reasons[]= {ERR_REASON(DH_R_DECODE_ERROR) ,"decode error"}, {ERR_REASON(DH_R_INVALID_PUBKEY) ,"invalid public key"}, {ERR_REASON(DH_R_KEYS_NOT_SET) ,"keys not set"}, +{ERR_REASON(DH_R_KEY_SIZE_TOO_SMALL) ,"key size too small"}, {ERR_REASON(DH_R_MODULUS_TOO_LARGE) ,"modulus too large"}, +{ERR_REASON(DH_R_NON_FIPS_METHOD) ,"non fips method"}, {ERR_REASON(DH_R_NO_PARAMETERS_SET) ,"no parameters set"}, {ERR_REASON(DH_R_NO_PRIVATE_VALUE) ,"no private value"}, {ERR_REASON(DH_R_PARAMETER_ENCODING_ERROR),"parameter encoding error"}, diff --git a/app/openssl/crypto/dh/dh_gen.c b/app/openssl/crypto/dh/dh_gen.c index cfd5b118..7b1fe9c9 100644 --- a/app/openssl/crypto/dh/dh_gen.c +++ b/app/openssl/crypto/dh/dh_gen.c @@ -66,12 +66,29 @@ #include #include +#ifdef OPENSSL_FIPS +#include +#endif + static int dh_builtin_genparams(DH *ret, int prime_len, int generator, BN_GENCB *cb); int DH_generate_parameters_ex(DH *ret, int prime_len, int generator, BN_GENCB *cb) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(ret->meth->flags & DH_FLAG_FIPS_METHOD) + && !(ret->flags & DH_FLAG_NON_FIPS_ALLOW)) + { + DHerr(DH_F_DH_GENERATE_PARAMETERS_EX, DH_R_NON_FIPS_METHOD); + return 0; + } +#endif if(ret->meth->generate_params) return ret->meth->generate_params(ret, prime_len, generator, cb); +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_dh_generate_parameters_ex(ret, prime_len, + generator, cb); +#endif return dh_builtin_genparams(ret, prime_len, generator, cb); } diff --git a/app/openssl/crypto/dh/dh_key.c b/app/openssl/crypto/dh/dh_key.c index e7db4403..89a74db4 100644 --- a/app/openssl/crypto/dh/dh_key.c +++ b/app/openssl/crypto/dh/dh_key.c @@ -73,11 +73,27 @@ static int dh_finish(DH *dh); int DH_generate_key(DH *dh) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dh->meth->flags & DH_FLAG_FIPS_METHOD) + && !(dh->flags & DH_FLAG_NON_FIPS_ALLOW)) + { + DHerr(DH_F_DH_GENERATE_KEY, DH_R_NON_FIPS_METHOD); + return 0; + } +#endif return dh->meth->generate_key(dh); } int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dh->meth->flags & DH_FLAG_FIPS_METHOD) + && !(dh->flags & DH_FLAG_NON_FIPS_ALLOW)) + { + DHerr(DH_F_DH_COMPUTE_KEY, DH_R_NON_FIPS_METHOD); + return 0; + } +#endif return dh->meth->compute_key(key, pub_key, dh); } @@ -138,8 +154,21 @@ static int generate_key(DH *dh) if (generate_new_key) { - l = dh->length ? dh->length : BN_num_bits(dh->p)-1; /* secret exponent length */ - if (!BN_rand(priv_key, l, 0, 0)) goto err; + if (dh->q) + { + do + { + if (!BN_rand_range(priv_key, dh->q)) + goto err; + } + while (BN_is_zero(priv_key) || BN_is_one(priv_key)); + } + else + { + /* secret exponent length */ + l = dh->length ? dh->length : BN_num_bits(dh->p)-1; + if (!BN_rand(priv_key, l, 0, 0)) goto err; + } } { diff --git a/app/openssl/crypto/dh/dh_lib.c b/app/openssl/crypto/dh/dh_lib.c index 7aef080e..00218f2b 100644 --- a/app/openssl/crypto/dh/dh_lib.c +++ b/app/openssl/crypto/dh/dh_lib.c @@ -64,6 +64,10 @@ #include #endif +#ifdef OPENSSL_FIPS +#include +#endif + const char DH_version[]="Diffie-Hellman" OPENSSL_VERSION_PTEXT; static const DH_METHOD *default_DH_method = NULL; @@ -76,7 +80,16 @@ void DH_set_default_method(const DH_METHOD *meth) const DH_METHOD *DH_get_default_method(void) { if(!default_DH_method) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_dh_openssl(); + else + return DH_OpenSSL(); +#else default_DH_method = DH_OpenSSL(); +#endif + } return default_DH_method; } @@ -156,7 +169,7 @@ DH *DH_new_method(ENGINE *engine) ret->counter = NULL; ret->method_mont_p=NULL; ret->references = 1; - ret->flags=ret->meth->flags; + ret->flags=ret->meth->flags & ~DH_FLAG_NON_FIPS_ALLOW; CRYPTO_new_ex_data(CRYPTO_EX_INDEX_DH, ret, &ret->ex_data); if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { diff --git a/app/openssl/crypto/dsa/dsa.h b/app/openssl/crypto/dsa/dsa.h index ac50a5c8..7531c653 100644 --- a/app/openssl/crypto/dsa/dsa.h +++ b/app/openssl/crypto/dsa/dsa.h @@ -96,6 +96,25 @@ * faster variable sliding window method to * be used for all exponents. */ +#define DSA_FLAG_NONCE_FROM_HASH 0x04 /* Causes the DSA nonce to be calculated + from SHA512(private_key + H(message) + + random). This strengthens DSA against a + weak PRNG. */ + +/* If this flag is set the DSA method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its reposibility + * to ensure the result is compliant. + */ + +#define DSA_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define DSA_FLAG_NON_FIPS_ALLOW 0x0400 #ifdef __cplusplus extern "C" { @@ -115,8 +134,9 @@ struct dsa_method { const char *name; DSA_SIG * (*dsa_do_sign)(const unsigned char *dgst, int dlen, DSA *dsa); - int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, - BIGNUM **rp); + int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen); int (*dsa_do_verify)(const unsigned char *dgst, int dgst_len, DSA_SIG *sig, DSA *dsa); int (*dsa_mod_exp)(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, @@ -272,6 +292,8 @@ void ERR_load_DSA_strings(void); #define DSA_F_DSAPARAMS_PRINT_FP 101 #define DSA_F_DSA_DO_SIGN 112 #define DSA_F_DSA_DO_VERIFY 113 +#define DSA_F_DSA_GENERATE_KEY 124 +#define DSA_F_DSA_GENERATE_PARAMETERS_EX 123 #define DSA_F_DSA_NEW_METHOD 103 #define DSA_F_DSA_PARAM_DECODE 119 #define DSA_F_DSA_PRINT_FP 105 @@ -282,6 +304,7 @@ void ERR_load_DSA_strings(void); #define DSA_F_DSA_SIGN 106 #define DSA_F_DSA_SIGN_SETUP 107 #define DSA_F_DSA_SIG_NEW 109 +#define DSA_F_DSA_SIG_PRINT 125 #define DSA_F_DSA_VERIFY 108 #define DSA_F_I2D_DSA_SIG 111 #define DSA_F_OLD_DSA_PRIV_DECODE 122 @@ -298,6 +321,9 @@ void ERR_load_DSA_strings(void); #define DSA_R_INVALID_DIGEST_TYPE 106 #define DSA_R_MISSING_PARAMETERS 101 #define DSA_R_MODULUS_TOO_LARGE 103 +#define DSA_R_NEED_NEW_SETUP_VALUES 110 +#define DSA_R_NONCE_CANNOT_BE_PRECOMPUTED 112 +#define DSA_R_NON_FIPS_DSA_METHOD 111 #define DSA_R_NO_PARAMETERS_SET 107 #define DSA_R_PARAMETER_ENCODING_ERROR 105 diff --git a/app/openssl/crypto/dsa/dsa_ameth.c b/app/openssl/crypto/dsa/dsa_ameth.c index 6413aae4..376156ec 100644 --- a/app/openssl/crypto/dsa/dsa_ameth.c +++ b/app/openssl/crypto/dsa/dsa_ameth.c @@ -542,6 +542,52 @@ static int old_dsa_priv_encode(const EVP_PKEY *pkey, unsigned char **pder) return i2d_DSAPrivateKey(pkey->pkey.dsa, pder); } +static int dsa_sig_print(BIO *bp, const X509_ALGOR *sigalg, + const ASN1_STRING *sig, + int indent, ASN1_PCTX *pctx) + { + DSA_SIG *dsa_sig; + const unsigned char *p; + if (!sig) + { + if (BIO_puts(bp, "\n") <= 0) + return 0; + else + return 1; + } + p = sig->data; + dsa_sig = d2i_DSA_SIG(NULL, &p, sig->length); + if (dsa_sig) + { + int rv = 0; + size_t buf_len = 0; + unsigned char *m=NULL; + update_buflen(dsa_sig->r, &buf_len); + update_buflen(dsa_sig->s, &buf_len); + m = OPENSSL_malloc(buf_len+10); + if (m == NULL) + { + DSAerr(DSA_F_DSA_SIG_PRINT,ERR_R_MALLOC_FAILURE); + goto err; + } + + if (BIO_write(bp, "\n", 1) != 1) + goto err; + + if (!ASN1_bn_print(bp,"r: ",dsa_sig->r,m,indent)) + goto err; + if (!ASN1_bn_print(bp,"s: ",dsa_sig->s,m,indent)) + goto err; + rv = 1; + err: + if (m) + OPENSSL_free(m); + DSA_SIG_free(dsa_sig); + return rv; + } + return X509_signature_dump(bp, sig, indent); + } + static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) { switch (op) @@ -647,6 +693,7 @@ const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[] = dsa_copy_parameters, dsa_cmp_parameters, dsa_param_print, + dsa_sig_print, int_dsa_free, dsa_pkey_ctrl, diff --git a/app/openssl/crypto/dsa/dsa_asn1.c b/app/openssl/crypto/dsa/dsa_asn1.c index c37460b2..60585343 100644 --- a/app/openssl/crypto/dsa/dsa_asn1.c +++ b/app/openssl/crypto/dsa/dsa_asn1.c @@ -61,6 +61,7 @@ #include #include #include +#include /* Override the default new methods */ static int sig_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, @@ -87,7 +88,7 @@ ASN1_SEQUENCE_cb(DSA_SIG, sig_cb) = { ASN1_SIMPLE(DSA_SIG, s, CBIGNUM) } ASN1_SEQUENCE_END_cb(DSA_SIG, DSA_SIG) -IMPLEMENT_ASN1_FUNCTIONS_const(DSA_SIG) +IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(DSA_SIG, DSA_SIG, DSA_SIG) /* Override the default free and new methods */ static int dsa_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, @@ -148,3 +149,40 @@ DSA *DSAparams_dup(DSA *dsa) { return ASN1_item_dup(ASN1_ITEM_rptr(DSAparams), dsa); } + +int DSA_sign(int type, const unsigned char *dgst, int dlen, unsigned char *sig, + unsigned int *siglen, DSA *dsa) + { + DSA_SIG *s; + RAND_seed(dgst, dlen); + s=DSA_do_sign(dgst,dlen,dsa); + if (s == NULL) + { + *siglen=0; + return(0); + } + *siglen=i2d_DSA_SIG(s,&sig); + DSA_SIG_free(s); + return(1); + } + +/* data has already been hashed (probably with SHA or SHA-1). */ +/* returns + * 1: correct signature + * 0: incorrect signature + * -1: error + */ +int DSA_verify(int type, const unsigned char *dgst, int dgst_len, + const unsigned char *sigbuf, int siglen, DSA *dsa) + { + DSA_SIG *s; + int ret=-1; + + s = DSA_SIG_new(); + if (s == NULL) return(ret); + if (d2i_DSA_SIG(&s,&sigbuf,siglen) == NULL) goto err; + ret=DSA_do_verify(dgst,dgst_len,s,dsa); +err: + DSA_SIG_free(s); + return(ret); + } diff --git a/app/openssl/crypto/dsa/dsa_err.c b/app/openssl/crypto/dsa/dsa_err.c index bba984e9..e6171cce 100644 --- a/app/openssl/crypto/dsa/dsa_err.c +++ b/app/openssl/crypto/dsa/dsa_err.c @@ -1,6 +1,6 @@ /* crypto/dsa/dsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -76,6 +76,8 @@ static ERR_STRING_DATA DSA_str_functs[]= {ERR_FUNC(DSA_F_DSAPARAMS_PRINT_FP), "DSAparams_print_fp"}, {ERR_FUNC(DSA_F_DSA_DO_SIGN), "DSA_do_sign"}, {ERR_FUNC(DSA_F_DSA_DO_VERIFY), "DSA_do_verify"}, +{ERR_FUNC(DSA_F_DSA_GENERATE_KEY), "DSA_generate_key"}, +{ERR_FUNC(DSA_F_DSA_GENERATE_PARAMETERS_EX), "DSA_generate_parameters_ex"}, {ERR_FUNC(DSA_F_DSA_NEW_METHOD), "DSA_new_method"}, {ERR_FUNC(DSA_F_DSA_PARAM_DECODE), "DSA_PARAM_DECODE"}, {ERR_FUNC(DSA_F_DSA_PRINT_FP), "DSA_print_fp"}, @@ -86,6 +88,7 @@ static ERR_STRING_DATA DSA_str_functs[]= {ERR_FUNC(DSA_F_DSA_SIGN), "DSA_sign"}, {ERR_FUNC(DSA_F_DSA_SIGN_SETUP), "DSA_sign_setup"}, {ERR_FUNC(DSA_F_DSA_SIG_NEW), "DSA_SIG_new"}, +{ERR_FUNC(DSA_F_DSA_SIG_PRINT), "DSA_SIG_PRINT"}, {ERR_FUNC(DSA_F_DSA_VERIFY), "DSA_verify"}, {ERR_FUNC(DSA_F_I2D_DSA_SIG), "i2d_DSA_SIG"}, {ERR_FUNC(DSA_F_OLD_DSA_PRIV_DECODE), "OLD_DSA_PRIV_DECODE"}, @@ -105,6 +108,9 @@ static ERR_STRING_DATA DSA_str_reasons[]= {ERR_REASON(DSA_R_INVALID_DIGEST_TYPE) ,"invalid digest type"}, {ERR_REASON(DSA_R_MISSING_PARAMETERS) ,"missing parameters"}, {ERR_REASON(DSA_R_MODULUS_TOO_LARGE) ,"modulus too large"}, +{ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES) ,"need new setup values"}, +{ERR_REASON(DSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"}, +{ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD) ,"non fips dsa method"}, {ERR_REASON(DSA_R_NO_PARAMETERS_SET) ,"no parameters set"}, {ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR),"parameter encoding error"}, {0,NULL} diff --git a/app/openssl/crypto/dsa/dsa_gen.c b/app/openssl/crypto/dsa/dsa_gen.c index cb0b4538..c398761d 100644 --- a/app/openssl/crypto/dsa/dsa_gen.c +++ b/app/openssl/crypto/dsa/dsa_gen.c @@ -81,13 +81,33 @@ #include #include "dsa_locl.h" +#ifdef OPENSSL_FIPS +#include +#endif + int DSA_generate_parameters_ex(DSA *ret, int bits, const unsigned char *seed_in, int seed_len, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(ret->meth->flags & DSA_FLAG_FIPS_METHOD) + && !(ret->flags & DSA_FLAG_NON_FIPS_ALLOW)) + { + DSAerr(DSA_F_DSA_GENERATE_PARAMETERS_EX, DSA_R_NON_FIPS_DSA_METHOD); + return 0; + } +#endif if(ret->meth->dsa_paramgen) return ret->meth->dsa_paramgen(ret, bits, seed_in, seed_len, counter_ret, h_ret, cb); +#ifdef OPENSSL_FIPS + else if (FIPS_mode()) + { + return FIPS_dsa_generate_parameters_ex(ret, bits, + seed_in, seed_len, + counter_ret, h_ret, cb); + } +#endif else { const EVP_MD *evpmd; @@ -105,12 +125,13 @@ int DSA_generate_parameters_ex(DSA *ret, int bits, } return dsa_builtin_paramgen(ret, bits, qbits, evpmd, - seed_in, seed_len, counter_ret, h_ret, cb); + seed_in, seed_len, NULL, counter_ret, h_ret, cb); } } int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len, + unsigned char *seed_out, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb) { int ok=0; @@ -201,8 +222,10 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, } /* step 2 */ - EVP_Digest(seed, qsize, md, NULL, evpmd, NULL); - EVP_Digest(buf, qsize, buf2, NULL, evpmd, NULL); + if (!EVP_Digest(seed, qsize, md, NULL, evpmd, NULL)) + goto err; + if (!EVP_Digest(buf, qsize, buf2, NULL, evpmd, NULL)) + goto err; for (i = 0; i < qsize; i++) md[i]^=buf2[i]; @@ -251,7 +274,9 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, break; } - EVP_Digest(buf, qsize, md ,NULL, evpmd, NULL); + if (!EVP_Digest(buf, qsize, md ,NULL, evpmd, + NULL)) + goto err; /* step 8 */ if (!BN_bin2bn(md, qsize, r0)) @@ -332,6 +357,8 @@ err: } if (counter_ret != NULL) *counter_ret=counter; if (h_ret != NULL) *h_ret=h; + if (seed_out) + memcpy(seed_out, seed, qsize); } if(ctx) { diff --git a/app/openssl/crypto/dsa/dsa_key.c b/app/openssl/crypto/dsa/dsa_key.c index c4aa86bc..9cf669b9 100644 --- a/app/openssl/crypto/dsa/dsa_key.c +++ b/app/openssl/crypto/dsa/dsa_key.c @@ -64,12 +64,28 @@ #include #include +#ifdef OPENSSL_FIPS +#include +#endif + static int dsa_builtin_keygen(DSA *dsa); int DSA_generate_key(DSA *dsa) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dsa->meth->flags & DSA_FLAG_FIPS_METHOD) + && !(dsa->flags & DSA_FLAG_NON_FIPS_ALLOW)) + { + DSAerr(DSA_F_DSA_GENERATE_KEY, DSA_R_NON_FIPS_DSA_METHOD); + return 0; + } +#endif if(dsa->meth->dsa_keygen) return dsa->meth->dsa_keygen(dsa); +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_dsa_generate_key(dsa); +#endif return dsa_builtin_keygen(dsa); } diff --git a/app/openssl/crypto/dsa/dsa_lib.c b/app/openssl/crypto/dsa/dsa_lib.c index e9b75902..96d8d0c4 100644 --- a/app/openssl/crypto/dsa/dsa_lib.c +++ b/app/openssl/crypto/dsa/dsa_lib.c @@ -70,6 +70,10 @@ #include #endif +#ifdef OPENSSL_FIPS +#include +#endif + const char DSA_version[]="DSA" OPENSSL_VERSION_PTEXT; static const DSA_METHOD *default_DSA_method = NULL; @@ -82,7 +86,16 @@ void DSA_set_default_method(const DSA_METHOD *meth) const DSA_METHOD *DSA_get_default_method(void) { if(!default_DSA_method) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_dsa_openssl(); + else + return DSA_OpenSSL(); +#else default_DSA_method = DSA_OpenSSL(); +#endif + } return default_DSA_method; } @@ -163,7 +176,7 @@ DSA *DSA_new_method(ENGINE *engine) ret->method_mont_p=NULL; ret->references=1; - ret->flags=ret->meth->flags; + ret->flags=ret->meth->flags & ~DSA_FLAG_NON_FIPS_ALLOW; CRYPTO_new_ex_data(CRYPTO_EX_INDEX_DSA, ret, &ret->ex_data); if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { @@ -276,7 +289,8 @@ void *DSA_get_ex_data(DSA *d, int idx) DH *DSA_dup_DH(const DSA *r) { /* DSA has p, q, g, optional pub_key, optional priv_key. - * DH has p, optional length, g, optional pub_key, optional priv_key. + * DH has p, optional length, g, optional pub_key, optional priv_key, + * optional q. */ DH *ret = NULL; @@ -290,7 +304,11 @@ DH *DSA_dup_DH(const DSA *r) if ((ret->p = BN_dup(r->p)) == NULL) goto err; if (r->q != NULL) + { ret->length = BN_num_bits(r->q); + if ((ret->q = BN_dup(r->q)) == NULL) + goto err; + } if (r->g != NULL) if ((ret->g = BN_dup(r->g)) == NULL) goto err; diff --git a/app/openssl/crypto/dsa/dsa_locl.h b/app/openssl/crypto/dsa/dsa_locl.h index 2b8cfee3..21e2e452 100644 --- a/app/openssl/crypto/dsa/dsa_locl.h +++ b/app/openssl/crypto/dsa/dsa_locl.h @@ -56,4 +56,5 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len, + unsigned char *seed_out, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); diff --git a/app/openssl/crypto/dsa/dsa_ossl.c b/app/openssl/crypto/dsa/dsa_ossl.c index a3ddd7d2..177fc545 100644 --- a/app/openssl/crypto/dsa/dsa_ossl.c +++ b/app/openssl/crypto/dsa/dsa_ossl.c @@ -67,7 +67,9 @@ #include static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa); -static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp); +static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen); static int dsa_do_verify(const unsigned char *dgst, int dgst_len, DSA_SIG *sig, DSA *dsa); static int dsa_init(DSA *dsa); @@ -136,6 +138,7 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) BN_CTX *ctx=NULL; int reason=ERR_R_BN_LIB; DSA_SIG *ret=NULL; + int noredo = 0; BN_init(&m); BN_init(&xr); @@ -150,10 +153,11 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) if (s == NULL) goto err; ctx=BN_CTX_new(); if (ctx == NULL) goto err; - +redo: if ((dsa->kinv == NULL) || (dsa->r == NULL)) { - if (!DSA_sign_setup(dsa,ctx,&kinv,&r)) goto err; + if (!dsa->meth->dsa_sign_setup(dsa,ctx,&kinv,&r,dgst,dlen)) + goto err; } else { @@ -161,6 +165,7 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) dsa->kinv=NULL; r=dsa->r; dsa->r=NULL; + noredo = 1; } @@ -181,6 +186,18 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) ret=DSA_SIG_new(); if (ret == NULL) goto err; + /* Redo if r or s is zero as required by FIPS 186-3: this is + * very unlikely. + */ + if (BN_is_zero(r) || BN_is_zero(s)) + { + if (noredo) + { + reason = DSA_R_NEED_NEW_SETUP_VALUES; + goto err; + } + goto redo; + } ret->r = r; ret->s = s; @@ -199,7 +216,9 @@ err: return(ret); } -static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp) +static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen) { BN_CTX *ctx; BIGNUM k,kq,*K,*kinv=NULL,*r=NULL; @@ -225,8 +244,21 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp) /* Get random k */ do - if (!BN_rand_range(&k, dsa->q)) goto err; - while (BN_is_zero(&k)); + { +#ifndef OPENSSL_NO_SHA512 + if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH) + { + /* If DSA_FLAG_NONCE_FROM_HASH is set then we calculate k from + * SHA512(private_key + H(message) + random). This protects the + * private key from a weak PRNG. */ + if (!BN_generate_dsa_nonce(&k, dsa->q, dsa->priv_key, dgst, + dlen, ctx)) + goto err; + } + else +#endif + if (!BN_rand_range(&k, dsa->q)) goto err; + } while (BN_is_zero(&k)); if ((dsa->flags & DSA_FLAG_NO_EXP_CONSTTIME) == 0) { BN_set_flags(&k, BN_FLG_CONSTTIME); diff --git a/app/openssl/crypto/dsa/dsa_pmeth.c b/app/openssl/crypto/dsa/dsa_pmeth.c index e2df54fe..715d8d67 100644 --- a/app/openssl/crypto/dsa/dsa_pmeth.c +++ b/app/openssl/crypto/dsa/dsa_pmeth.c @@ -189,7 +189,9 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) EVP_MD_type((const EVP_MD *)p2) != NID_dsa && EVP_MD_type((const EVP_MD *)p2) != NID_dsaWithSHA && EVP_MD_type((const EVP_MD *)p2) != NID_sha224 && - EVP_MD_type((const EVP_MD *)p2) != NID_sha256) + EVP_MD_type((const EVP_MD *)p2) != NID_sha256 && + EVP_MD_type((const EVP_MD *)p2) != NID_sha384 && + EVP_MD_type((const EVP_MD *)p2) != NID_sha512) { DSAerr(DSA_F_PKEY_DSA_CTRL, DSA_R_INVALID_DIGEST_TYPE); return 0; @@ -253,7 +255,7 @@ static int pkey_dsa_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) if (!dsa) return 0; ret = dsa_builtin_paramgen(dsa, dctx->nbits, dctx->qbits, dctx->pmd, - NULL, 0, NULL, NULL, pcb); + NULL, 0, NULL, NULL, NULL, pcb); if (ret) EVP_PKEY_assign_DSA(pkey, dsa); else diff --git a/app/openssl/crypto/dsa/dsa_sign.c b/app/openssl/crypto/dsa/dsa_sign.c index 17555e58..8ace300a 100644 --- a/app/openssl/crypto/dsa/dsa_sign.c +++ b/app/openssl/crypto/dsa/dsa_sign.c @@ -61,30 +61,61 @@ #include "cryptlib.h" #include #include +#include DSA_SIG * DSA_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dsa->meth->flags & DSA_FLAG_FIPS_METHOD) + && !(dsa->flags & DSA_FLAG_NON_FIPS_ALLOW)) + { + DSAerr(DSA_F_DSA_DO_SIGN, DSA_R_NON_FIPS_DSA_METHOD); + return NULL; + } +#endif return dsa->meth->dsa_do_sign(dgst, dlen, dsa); } -int DSA_sign(int type, const unsigned char *dgst, int dlen, unsigned char *sig, - unsigned int *siglen, DSA *dsa) +int DSA_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp) { - DSA_SIG *s; - RAND_seed(dgst, dlen); - s=DSA_do_sign(dgst,dlen,dsa); - if (s == NULL) +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dsa->meth->flags & DSA_FLAG_FIPS_METHOD) + && !(dsa->flags & DSA_FLAG_NON_FIPS_ALLOW)) { - *siglen=0; - return(0); + DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_NON_FIPS_DSA_METHOD); + return 0; } - *siglen=i2d_DSA_SIG(s,&sig); - DSA_SIG_free(s); - return(1); +#endif + if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH) + { + /* You cannot precompute the DSA nonce if it is required to + * depend on the message. */ + DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_NONCE_CANNOT_BE_PRECOMPUTED); + return 0; + } + return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp, NULL, 0); } -int DSA_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp) +DSA_SIG *DSA_SIG_new(void) { - return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp); + DSA_SIG *sig; + sig = OPENSSL_malloc(sizeof(DSA_SIG)); + if (!sig) + return NULL; + sig->r = NULL; + sig->s = NULL; + return sig; + } + +void DSA_SIG_free(DSA_SIG *sig) + { + if (sig) + { + if (sig->r) + BN_free(sig->r); + if (sig->s) + BN_free(sig->s); + OPENSSL_free(sig); + } } diff --git a/app/openssl/crypto/dsa/dsa_vrf.c b/app/openssl/crypto/dsa/dsa_vrf.c index 226a75ff..674cb5fa 100644 --- a/app/openssl/crypto/dsa/dsa_vrf.c +++ b/app/openssl/crypto/dsa/dsa_vrf.c @@ -64,26 +64,13 @@ int DSA_do_verify(const unsigned char *dgst, int dgst_len, DSA_SIG *sig, DSA *dsa) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(dsa->meth->flags & DSA_FLAG_FIPS_METHOD) + && !(dsa->flags & DSA_FLAG_NON_FIPS_ALLOW)) + { + DSAerr(DSA_F_DSA_DO_VERIFY, DSA_R_NON_FIPS_DSA_METHOD); + return -1; + } +#endif return dsa->meth->dsa_do_verify(dgst, dgst_len, sig, dsa); } - -/* data has already been hashed (probably with SHA or SHA-1). */ -/* returns - * 1: correct signature - * 0: incorrect signature - * -1: error - */ -int DSA_verify(int type, const unsigned char *dgst, int dgst_len, - const unsigned char *sigbuf, int siglen, DSA *dsa) - { - DSA_SIG *s; - int ret=-1; - - s = DSA_SIG_new(); - if (s == NULL) return(ret); - if (d2i_DSA_SIG(&s,&sigbuf,siglen) == NULL) goto err; - ret=DSA_do_verify(dgst,dgst_len,s,dsa); -err: - DSA_SIG_free(s); - return(ret); - } diff --git a/app/openssl/crypto/dso/dso_dlfcn.c b/app/openssl/crypto/dso/dso_dlfcn.c index c2bc6176..5f225480 100644 --- a/app/openssl/crypto/dso/dso_dlfcn.c +++ b/app/openssl/crypto/dso/dso_dlfcn.c @@ -86,7 +86,8 @@ DSO_METHOD *DSO_METHOD_dlfcn(void) # if defined(_AIX) || defined(__CYGWIN__) || \ defined(__SCO_VERSION__) || defined(_SCO_ELF) || \ (defined(__osf__) && !defined(RTLD_NEXT)) || \ - (defined(__OpenBSD__) && !defined(RTLD_SELF)) + (defined(__OpenBSD__) && !defined(RTLD_SELF)) || \ + defined(__ANDROID__) # undef HAVE_DLINFO # endif #endif diff --git a/app/openssl/crypto/dso/dso_lib.c b/app/openssl/crypto/dso/dso_lib.c index 8a15b794..78015298 100644 --- a/app/openssl/crypto/dso/dso_lib.c +++ b/app/openssl/crypto/dso/dso_lib.c @@ -237,11 +237,19 @@ DSO *DSO_load(DSO *dso, const char *filename, DSO_METHOD *meth, int flags) if(ret->meth->dso_load == NULL) { DSOerr(DSO_F_DSO_LOAD,DSO_R_UNSUPPORTED); + /* Make sure we unset the filename on failure, because we use + * this to determine when the DSO has been loaded above. */ + OPENSSL_free(ret->filename); + ret->filename = NULL; goto err; } if(!ret->meth->dso_load(ret)) { DSOerr(DSO_F_DSO_LOAD,DSO_R_LOAD_FAILED); + /* Make sure we unset the filename on failure, because we use + * this to determine when the DSO has been loaded above. */ + OPENSSL_free(ret->filename); + ret->filename = NULL; goto err; } /* Load succeeded */ diff --git a/app/openssl/crypto/ec/ec.h b/app/openssl/crypto/ec/ec.h index ee707813..d008a0da 100644 --- a/app/openssl/crypto/ec/ec.h +++ b/app/openssl/crypto/ec/ec.h @@ -151,7 +151,24 @@ const EC_METHOD *EC_GFp_mont_method(void); */ const EC_METHOD *EC_GFp_nist_method(void); +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +/** Returns 64-bit optimized methods for nistp224 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp224_method(void); + +/** Returns 64-bit optimized methods for nistp256 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp256_method(void); + +/** Returns 64-bit optimized methods for nistp521 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp521_method(void); +#endif +#ifndef OPENSSL_NO_EC2M /********************************************************************/ /* EC_METHOD for curves over GF(2^m) */ /********************************************************************/ @@ -161,6 +178,8 @@ const EC_METHOD *EC_GFp_nist_method(void); */ const EC_METHOD *EC_GF2m_simple_method(void); +#endif + /********************************************************************/ /* EC_GROUP functions */ @@ -255,10 +274,10 @@ int EC_GROUP_get_curve_name(const EC_GROUP *group); void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag); int EC_GROUP_get_asn1_flag(const EC_GROUP *group); -void EC_GROUP_set_point_conversion_form(EC_GROUP *, point_conversion_form_t); +void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form); point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *); -unsigned char *EC_GROUP_get0_seed(const EC_GROUP *); +unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x); size_t EC_GROUP_get_seed_len(const EC_GROUP *); size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len); @@ -282,6 +301,7 @@ int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, co */ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); +#ifndef OPENSSL_NO_EC2M /** Sets the parameter of a ec over GF2m defined by y^2 + x*y = x^3 + a*x^2 + b * \param group EC_GROUP object * \param p BIGNUM with the polynomial defining the underlying field @@ -301,7 +321,7 @@ int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, c * \return 1 on success and 0 if an error occured */ int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); - +#endif /** Returns the number of bits needed to represent a field element * \param group EC_GROUP object * \return number of bits needed to represent a field element @@ -342,7 +362,7 @@ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx); * \return newly created EC_GROUP object with the specified parameters */ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); - +#ifndef OPENSSL_NO_EC2M /** Creates a new EC_GROUP object with the specified parameters defined * over GF2m (defined by the equation y^2 + x*y = x^3 + a*x^2 + b) * \param p BIGNUM with the polynomial defining the underlying field @@ -352,7 +372,7 @@ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM * \return newly created EC_GROUP object with the specified parameters */ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); - +#endif /** Creates a EC_GROUP object with a curve specified by a NID * \param nid NID of the OID of the curve name * \return newly created EC_GROUP object with specified curve or NULL @@ -481,7 +501,7 @@ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, */ int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, const BIGNUM *x, int y_bit, BN_CTX *ctx); - +#ifndef OPENSSL_NO_EC2M /** Sets the affine coordinates of a EC_POINT over GF2m * \param group underlying EC_GROUP object * \param p EC_POINT object @@ -514,7 +534,7 @@ int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, */ int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, const BIGNUM *x, int y_bit, BN_CTX *ctx); - +#endif /** Encodes a EC_POINT object to a octet string * \param group underlying EC_GROUP object * \param p EC_POINT object @@ -606,8 +626,8 @@ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *c */ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); -int EC_POINT_make_affine(const EC_GROUP *, EC_POINT *, BN_CTX *); -int EC_POINTs_make_affine(const EC_GROUP *, size_t num, EC_POINT *[], BN_CTX *); +int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx); +int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx); /** Computes r = generator * n sum_{i=0}^num p[i] * m[i] * \param group underlying EC_GROUP object @@ -653,9 +673,11 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group); /* EC_GROUP_get_basis_type() returns the NID of the basis type * used to represent the field elements */ int EC_GROUP_get_basis_type(const EC_GROUP *); +#ifndef OPENSSL_NO_EC2M int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, unsigned int *k2, unsigned int *k3); +#endif #define OPENSSL_EC_NAMED_CURVE 0x001 @@ -689,11 +711,21 @@ typedef struct ec_key_st EC_KEY; #define EC_PKEY_NO_PARAMETERS 0x001 #define EC_PKEY_NO_PUBKEY 0x002 +/* some values for the flags field */ +#define EC_FLAG_NON_FIPS_ALLOW 0x1 +#define EC_FLAG_FIPS_CHECKED 0x2 + /** Creates a new EC_KEY object. * \return EC_KEY object or NULL if an error occurred. */ EC_KEY *EC_KEY_new(void); +int EC_KEY_get_flags(const EC_KEY *key); + +void EC_KEY_set_flags(EC_KEY *key, int flags); + +void EC_KEY_clear_flags(EC_KEY *key, int flags); + /** Creates a new EC_KEY object using a named curve as underlying * EC_GROUP object. * \param nid NID of the named curve. @@ -768,16 +800,35 @@ const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key); int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub); unsigned EC_KEY_get_enc_flags(const EC_KEY *key); -void EC_KEY_set_enc_flags(EC_KEY *, unsigned int); -point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *); -void EC_KEY_set_conv_form(EC_KEY *, point_conversion_form_t); +void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags); +point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); +void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); /* functions to set/get method specific data */ -void *EC_KEY_get_key_method_data(EC_KEY *, +void *EC_KEY_get_key_method_data(EC_KEY *key, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); -void EC_KEY_insert_key_method_data(EC_KEY *, void *data, +/** Sets the key method data of an EC_KEY object, if none has yet been set. + * \param key EC_KEY object + * \param data opaque data to install. + * \param dup_func a function that duplicates |data|. + * \param free_func a function that frees |data|. + * \param clear_free_func a function that wipes and frees |data|. + * \return the previously set data pointer, or NULL if |data| was inserted. + */ +void *EC_KEY_insert_key_method_data(EC_KEY *key, void *data, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); /* wrapper functions for the underlying EC_GROUP object */ -void EC_KEY_set_asn1_flag(EC_KEY *, int); +void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag); + +/** Sets whether ECDSA operations with the given key will calculate their k + * value from SHA512(private_key + message + random) in order to protect + * against a weak PRNG. + * \param on Whether to calculate k from a hash or not + */ +void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on); + +/** Returns the value of nonce_from_hash + */ +int EC_KEY_get_nonce_from_hash(const EC_KEY *key); /** Creates a table of pre-computed multiples of the generator to * accelerate further EC_KEY operations. @@ -799,6 +850,15 @@ int EC_KEY_generate_key(EC_KEY *key); */ int EC_KEY_check_key(const EC_KEY *key); +/** Sets a public key from affine coordindates performing + * neccessary NIST PKV tests. + * \param key the EC_KEY object + * \param x public key x coordinate + * \param y public key y coordinate + * \return 1 on success and 0 otherwise. + */ +int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y); + /********************************************************************/ /* de- and encoding functions for SEC1 ECPrivateKey */ @@ -926,6 +986,7 @@ void ERR_load_EC_strings(void); /* Error codes for the EC functions. */ /* Function codes. */ +#define EC_F_BN_TO_FELEM 224 #define EC_F_COMPUTE_WNAF 143 #define EC_F_D2I_ECPARAMETERS 144 #define EC_F_D2I_ECPKPARAMETERS 145 @@ -968,6 +1029,15 @@ void ERR_load_EC_strings(void); #define EC_F_EC_GFP_MONT_FIELD_SQR 132 #define EC_F_EC_GFP_MONT_GROUP_SET_CURVE 189 #define EC_F_EC_GFP_MONT_GROUP_SET_CURVE_GFP 135 +#define EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE 225 +#define EC_F_EC_GFP_NISTP224_POINTS_MUL 228 +#define EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES 226 +#define EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE 230 +#define EC_F_EC_GFP_NISTP256_POINTS_MUL 231 +#define EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES 232 +#define EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE 233 +#define EC_F_EC_GFP_NISTP521_POINTS_MUL 234 +#define EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES 235 #define EC_F_EC_GFP_NIST_FIELD_MUL 200 #define EC_F_EC_GFP_NIST_FIELD_SQR 201 #define EC_F_EC_GFP_NIST_GROUP_SET_CURVE 202 @@ -1010,6 +1080,7 @@ void ERR_load_EC_strings(void); #define EC_F_EC_KEY_NEW 182 #define EC_F_EC_KEY_PRINT 180 #define EC_F_EC_KEY_PRINT_FP 181 +#define EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES 229 #define EC_F_EC_POINTS_MAKE_AFFINE 136 #define EC_F_EC_POINT_ADD 112 #define EC_F_EC_POINT_CMP 113 @@ -1040,6 +1111,9 @@ void ERR_load_EC_strings(void); #define EC_F_I2D_ECPKPARAMETERS 191 #define EC_F_I2D_ECPRIVATEKEY 192 #define EC_F_I2O_ECPUBLICKEY 151 +#define EC_F_NISTP224_PRE_COMP_NEW 227 +#define EC_F_NISTP256_PRE_COMP_NEW 236 +#define EC_F_NISTP521_PRE_COMP_NEW 237 #define EC_F_O2I_ECPUBLICKEY 152 #define EC_F_OLD_EC_PRIV_DECODE 222 #define EC_F_PKEY_EC_CTRL 197 @@ -1052,12 +1126,15 @@ void ERR_load_EC_strings(void); /* Reason codes. */ #define EC_R_ASN1_ERROR 115 #define EC_R_ASN1_UNKNOWN_FIELD 116 +#define EC_R_BIGNUM_OUT_OF_RANGE 144 #define EC_R_BUFFER_TOO_SMALL 100 +#define EC_R_COORDINATES_OUT_OF_RANGE 146 #define EC_R_D2I_ECPKPARAMETERS_FAILURE 117 #define EC_R_DECODE_ERROR 142 #define EC_R_DISCRIMINANT_IS_ZERO 118 #define EC_R_EC_GROUP_NEW_BY_NAME_FAILURE 119 #define EC_R_FIELD_TOO_LARGE 143 +#define EC_R_GF2M_NOT_SUPPORTED 147 #define EC_R_GROUP2PKPARAMETERS_FAILURE 120 #define EC_R_I2D_ECPKPARAMETERS_FAILURE 121 #define EC_R_INCOMPATIBLE_OBJECTS 101 @@ -1092,6 +1169,7 @@ void ERR_load_EC_strings(void); #define EC_R_UNKNOWN_GROUP 129 #define EC_R_UNKNOWN_ORDER 114 #define EC_R_UNSUPPORTED_FIELD 131 +#define EC_R_WRONG_CURVE_PARAMETERS 145 #define EC_R_WRONG_ORDER 130 #ifdef __cplusplus diff --git a/app/openssl/crypto/ec/ec2_mult.c b/app/openssl/crypto/ec/ec2_mult.c index e12b9b28..1c575dc4 100644 --- a/app/openssl/crypto/ec/ec2_mult.c +++ b/app/openssl/crypto/ec/ec2_mult.c @@ -71,6 +71,8 @@ #include "ec_lcl.h" +#ifndef OPENSSL_NO_EC2M + /* Compute the x-coordinate x/z for the point 2*(x/z) in Montgomery projective * coordinates. @@ -206,11 +208,15 @@ static int gf2m_Mxy(const EC_GROUP *group, const BIGNUM *x, const BIGNUM *y, BIG return ret; } + /* Computes scalar*point and stores the result in r. * point can not equal r. - * Uses algorithm 2P of + * Uses a modified algorithm 2P of * Lopez, J. and Dahab, R. "Fast multiplication on elliptic curves over * GF(2^m) without precomputation" (CHES '99, LNCS 1717). + * + * To protect against side-channel attack the function uses constant time swap, + * avoiding conditional branches. */ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, const EC_POINT *point, BN_CTX *ctx) @@ -244,6 +250,11 @@ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, EC_POINT *r, x2 = &r->X; z2 = &r->Y; + bn_wexpand(x1, group->field.top); + bn_wexpand(z1, group->field.top); + bn_wexpand(x2, group->field.top); + bn_wexpand(z2, group->field.top); + if (!BN_GF2m_mod_arr(x1, &point->X, group->poly)) goto err; /* x1 = x */ if (!BN_one(z1)) goto err; /* z1 = 1 */ if (!group->meth->field_sqr(group, z2, x1, ctx)) goto err; /* z2 = x1^2 = x^2 */ @@ -268,16 +279,12 @@ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, EC_POINT *r, word = scalar->d[i]; while (mask) { - if (word & mask) - { - if (!gf2m_Madd(group, &point->X, x1, z1, x2, z2, ctx)) goto err; - if (!gf2m_Mdouble(group, x2, z2, ctx)) goto err; - } - else - { - if (!gf2m_Madd(group, &point->X, x2, z2, x1, z1, ctx)) goto err; - if (!gf2m_Mdouble(group, x1, z1, ctx)) goto err; - } + BN_consttime_swap(word & mask, x1, x2, group->field.top); + BN_consttime_swap(word & mask, z1, z2, group->field.top); + if (!gf2m_Madd(group, &point->X, x2, z2, x1, z1, ctx)) goto err; + if (!gf2m_Mdouble(group, x1, z1, ctx)) goto err; + BN_consttime_swap(word & mask, x1, x2, group->field.top); + BN_consttime_swap(word & mask, z1, z2, group->field.top); mask >>= 1; } mask = BN_TBIT; @@ -384,3 +391,5 @@ int ec_GF2m_have_precompute_mult(const EC_GROUP *group) { return ec_wNAF_have_precompute_mult(group); } + +#endif diff --git a/app/openssl/crypto/ec/ec2_oct.c b/app/openssl/crypto/ec/ec2_oct.c new file mode 100644 index 00000000..f1d75e5d --- /dev/null +++ b/app/openssl/crypto/ec/ec2_oct.c @@ -0,0 +1,407 @@ +/* crypto/ec/ec2_oct.c */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * The Elliptic Curve Public-Key Crypto Library (ECC Code) included + * herein is developed by SUN MICROSYSTEMS, INC., and is contributed + * to the OpenSSL project. + * + * The ECC Code is licensed pursuant to the OpenSSL open source + * license provided below. + * + * The software is originally written by Sheueling Chang Shantz and + * Douglas Stebila of Sun Microsystems Laboratories. + * + */ +/* ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include + +#include "ec_lcl.h" + +#ifndef OPENSSL_NO_EC2M + +/* Calculates and sets the affine coordinates of an EC_POINT from the given + * compressed coordinates. Uses algorithm 2.3.4 of SEC 1. + * Note that the simple implementation only uses affine coordinates. + * + * The method is from the following publication: + * + * Harper, Menezes, Vanstone: + * "Public-Key Cryptosystems with Very Small Key Lengths", + * EUROCRYPT '92, Springer-Verlag LNCS 658, + * published February 1993 + * + * US Patents 6,141,420 and 6,618,483 (Vanstone, Mullin, Agnew) describe + * the same method, but claim no priority date earlier than July 29, 1994 + * (and additionally fail to cite the EUROCRYPT '92 publication as prior art). + */ +int ec_GF2m_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point, + const BIGNUM *x_, int y_bit, BN_CTX *ctx) + { + BN_CTX *new_ctx = NULL; + BIGNUM *tmp, *x, *y, *z; + int ret = 0, z0; + + /* clear error queue */ + ERR_clear_error(); + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + y_bit = (y_bit != 0) ? 1 : 0; + + BN_CTX_start(ctx); + tmp = BN_CTX_get(ctx); + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + z = BN_CTX_get(ctx); + if (z == NULL) goto err; + + if (!BN_GF2m_mod_arr(x, x_, group->poly)) goto err; + if (BN_is_zero(x)) + { + if (!BN_GF2m_mod_sqrt_arr(y, &group->b, group->poly, ctx)) goto err; + } + else + { + if (!group->meth->field_sqr(group, tmp, x, ctx)) goto err; + if (!group->meth->field_div(group, tmp, &group->b, tmp, ctx)) goto err; + if (!BN_GF2m_add(tmp, &group->a, tmp)) goto err; + if (!BN_GF2m_add(tmp, x, tmp)) goto err; + if (!BN_GF2m_mod_solve_quad_arr(z, tmp, group->poly, ctx)) + { + unsigned long err = ERR_peek_last_error(); + + if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NO_SOLUTION) + { + ERR_clear_error(); + ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); + } + else + ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB); + goto err; + } + z0 = (BN_is_odd(z)) ? 1 : 0; + if (!group->meth->field_mul(group, y, x, z, ctx)) goto err; + if (z0 != y_bit) + { + if (!BN_GF2m_add(y, y, x)) goto err; + } + } + + if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + } + + +/* Converts an EC_POINT to an octet string. + * If buf is NULL, the encoded length will be returned. + * If the length len of buf is smaller than required an error will be returned. + */ +size_t ec_GF2m_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, + unsigned char *buf, size_t len, BN_CTX *ctx) + { + size_t ret; + BN_CTX *new_ctx = NULL; + int used_ctx = 0; + BIGNUM *x, *y, *yxi; + size_t field_len, i, skip; + + if ((form != POINT_CONVERSION_COMPRESSED) + && (form != POINT_CONVERSION_UNCOMPRESSED) + && (form != POINT_CONVERSION_HYBRID)) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_INVALID_FORM); + goto err; + } + + if (EC_POINT_is_at_infinity(group, point)) + { + /* encodes to a single 0 octet */ + if (buf != NULL) + { + if (len < 1) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); + return 0; + } + buf[0] = 0; + } + return 1; + } + + + /* ret := required output buffer length */ + field_len = (EC_GROUP_get_degree(group) + 7) / 8; + ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; + + /* if 'buf' is NULL, just return required length */ + if (buf != NULL) + { + if (len < ret) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); + goto err; + } + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + BN_CTX_start(ctx); + used_ctx = 1; + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + yxi = BN_CTX_get(ctx); + if (yxi == NULL) goto err; + + if (!EC_POINT_get_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; + + buf[0] = form; + if ((form != POINT_CONVERSION_UNCOMPRESSED) && !BN_is_zero(x)) + { + if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err; + if (BN_is_odd(yxi)) buf[0]++; + } + + i = 1; + + skip = field_len - BN_num_bytes(x); + if (skip > field_len) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + while (skip > 0) + { + buf[i++] = 0; + skip--; + } + skip = BN_bn2bin(x, buf + i); + i += skip; + if (i != 1 + field_len) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + + if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID) + { + skip = field_len - BN_num_bytes(y); + if (skip > field_len) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + while (skip > 0) + { + buf[i++] = 0; + skip--; + } + skip = BN_bn2bin(y, buf + i); + i += skip; + } + + if (i != ret) + { + ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + } + + if (used_ctx) + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + + err: + if (used_ctx) + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return 0; + } + + +/* Converts an octet string representation to an EC_POINT. + * Note that the simple implementation only uses affine coordinates. + */ +int ec_GF2m_simple_oct2point(const EC_GROUP *group, EC_POINT *point, + const unsigned char *buf, size_t len, BN_CTX *ctx) + { + point_conversion_form_t form; + int y_bit; + BN_CTX *new_ctx = NULL; + BIGNUM *x, *y, *yxi; + size_t field_len, enc_len; + int ret = 0; + + if (len == 0) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL); + return 0; + } + form = buf[0]; + y_bit = form & 1; + form = form & ~1U; + if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED) + && (form != POINT_CONVERSION_UNCOMPRESSED) + && (form != POINT_CONVERSION_HYBRID)) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + if (form == 0) + { + if (len != 1) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + return EC_POINT_set_to_infinity(group, point); + } + + field_len = (EC_GROUP_get_degree(group) + 7) / 8; + enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; + + if (len != enc_len) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + BN_CTX_start(ctx); + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + yxi = BN_CTX_get(ctx); + if (yxi == NULL) goto err; + + if (!BN_bin2bn(buf + 1, field_len, x)) goto err; + if (BN_ucmp(x, &group->field) >= 0) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + + if (form == POINT_CONVERSION_COMPRESSED) + { + if (!EC_POINT_set_compressed_coordinates_GF2m(group, point, x, y_bit, ctx)) goto err; + } + else + { + if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err; + if (BN_ucmp(y, &group->field) >= 0) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + if (form == POINT_CONVERSION_HYBRID) + { + if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err; + if (y_bit != BN_is_odd(yxi)) + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + } + + if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; + } + + if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */ + { + ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); + goto err; + } + + ret = 1; + + err: + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + } +#endif diff --git a/app/openssl/crypto/ec/ec2_smpl.c b/app/openssl/crypto/ec/ec2_smpl.c index af94458c..e0e59c7d 100644 --- a/app/openssl/crypto/ec/ec2_smpl.c +++ b/app/openssl/crypto/ec/ec2_smpl.c @@ -71,10 +71,20 @@ #include "ec_lcl.h" +#ifndef OPENSSL_NO_EC2M + +#ifdef OPENSSL_FIPS +#include +#endif + const EC_METHOD *EC_GF2m_simple_method(void) { +#ifdef OPENSSL_FIPS + return fips_ec_gf2m_simple_method(); +#else static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, NID_X9_62_characteristic_two_field, ec_GF2m_simple_group_init, ec_GF2m_simple_group_finish, @@ -93,9 +103,7 @@ const EC_METHOD *EC_GF2m_simple_method(void) 0 /* get_Jprojective_coordinates_GFp */, ec_GF2m_simple_point_set_affine_coordinates, ec_GF2m_simple_point_get_affine_coordinates, - ec_GF2m_simple_set_compressed_coordinates, - ec_GF2m_simple_point2oct, - ec_GF2m_simple_oct2point, + 0,0,0, ec_GF2m_simple_add, ec_GF2m_simple_dbl, ec_GF2m_simple_invert, @@ -118,6 +126,7 @@ const EC_METHOD *EC_GF2m_simple_method(void) 0 /* field_set_to_one */ }; return &ret; +#endif } @@ -405,340 +414,6 @@ int ec_GF2m_simple_point_get_affine_coordinates(const EC_GROUP *group, const EC_ return ret; } - -/* Calculates and sets the affine coordinates of an EC_POINT from the given - * compressed coordinates. Uses algorithm 2.3.4 of SEC 1. - * Note that the simple implementation only uses affine coordinates. - * - * The method is from the following publication: - * - * Harper, Menezes, Vanstone: - * "Public-Key Cryptosystems with Very Small Key Lengths", - * EUROCRYPT '92, Springer-Verlag LNCS 658, - * published February 1993 - * - * US Patents 6,141,420 and 6,618,483 (Vanstone, Mullin, Agnew) describe - * the same method, but claim no priority date earlier than July 29, 1994 - * (and additionally fail to cite the EUROCRYPT '92 publication as prior art). - */ -int ec_GF2m_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point, - const BIGNUM *x_, int y_bit, BN_CTX *ctx) - { - BN_CTX *new_ctx = NULL; - BIGNUM *tmp, *x, *y, *z; - int ret = 0, z0; - - /* clear error queue */ - ERR_clear_error(); - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - y_bit = (y_bit != 0) ? 1 : 0; - - BN_CTX_start(ctx); - tmp = BN_CTX_get(ctx); - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - z = BN_CTX_get(ctx); - if (z == NULL) goto err; - - if (!BN_GF2m_mod_arr(x, x_, group->poly)) goto err; - if (BN_is_zero(x)) - { - if (!BN_GF2m_mod_sqrt_arr(y, &group->b, group->poly, ctx)) goto err; - } - else - { - if (!group->meth->field_sqr(group, tmp, x, ctx)) goto err; - if (!group->meth->field_div(group, tmp, &group->b, tmp, ctx)) goto err; - if (!BN_GF2m_add(tmp, &group->a, tmp)) goto err; - if (!BN_GF2m_add(tmp, x, tmp)) goto err; - if (!BN_GF2m_mod_solve_quad_arr(z, tmp, group->poly, ctx)) - { - unsigned long err = ERR_peek_last_error(); - - if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NO_SOLUTION) - { - ERR_clear_error(); - ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); - } - else - ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB); - goto err; - } - z0 = (BN_is_odd(z)) ? 1 : 0; - if (!group->meth->field_mul(group, y, x, z, ctx)) goto err; - if (z0 != y_bit) - { - if (!BN_GF2m_add(y, y, x)) goto err; - } - } - - if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; - - ret = 1; - - err: - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - } - - -/* Converts an EC_POINT to an octet string. - * If buf is NULL, the encoded length will be returned. - * If the length len of buf is smaller than required an error will be returned. - */ -size_t ec_GF2m_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, - unsigned char *buf, size_t len, BN_CTX *ctx) - { - size_t ret; - BN_CTX *new_ctx = NULL; - int used_ctx = 0; - BIGNUM *x, *y, *yxi; - size_t field_len, i, skip; - - if ((form != POINT_CONVERSION_COMPRESSED) - && (form != POINT_CONVERSION_UNCOMPRESSED) - && (form != POINT_CONVERSION_HYBRID)) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_INVALID_FORM); - goto err; - } - - if (EC_POINT_is_at_infinity(group, point)) - { - /* encodes to a single 0 octet */ - if (buf != NULL) - { - if (len < 1) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); - return 0; - } - buf[0] = 0; - } - return 1; - } - - - /* ret := required output buffer length */ - field_len = (EC_GROUP_get_degree(group) + 7) / 8; - ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; - - /* if 'buf' is NULL, just return required length */ - if (buf != NULL) - { - if (len < ret) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); - goto err; - } - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - BN_CTX_start(ctx); - used_ctx = 1; - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - yxi = BN_CTX_get(ctx); - if (yxi == NULL) goto err; - - if (!EC_POINT_get_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; - - buf[0] = form; - if ((form != POINT_CONVERSION_UNCOMPRESSED) && !BN_is_zero(x)) - { - if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err; - if (BN_is_odd(yxi)) buf[0]++; - } - - i = 1; - - skip = field_len - BN_num_bytes(x); - if (skip > field_len) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - while (skip > 0) - { - buf[i++] = 0; - skip--; - } - skip = BN_bn2bin(x, buf + i); - i += skip; - if (i != 1 + field_len) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - - if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID) - { - skip = field_len - BN_num_bytes(y); - if (skip > field_len) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - while (skip > 0) - { - buf[i++] = 0; - skip--; - } - skip = BN_bn2bin(y, buf + i); - i += skip; - } - - if (i != ret) - { - ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - } - - if (used_ctx) - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - - err: - if (used_ctx) - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return 0; - } - - -/* Converts an octet string representation to an EC_POINT. - * Note that the simple implementation only uses affine coordinates. - */ -int ec_GF2m_simple_oct2point(const EC_GROUP *group, EC_POINT *point, - const unsigned char *buf, size_t len, BN_CTX *ctx) - { - point_conversion_form_t form; - int y_bit; - BN_CTX *new_ctx = NULL; - BIGNUM *x, *y, *yxi; - size_t field_len, enc_len; - int ret = 0; - - if (len == 0) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL); - return 0; - } - form = buf[0]; - y_bit = form & 1; - form = form & ~1U; - if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED) - && (form != POINT_CONVERSION_UNCOMPRESSED) - && (form != POINT_CONVERSION_HYBRID)) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - if (form == 0) - { - if (len != 1) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - return EC_POINT_set_to_infinity(group, point); - } - - field_len = (EC_GROUP_get_degree(group) + 7) / 8; - enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; - - if (len != enc_len) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - BN_CTX_start(ctx); - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - yxi = BN_CTX_get(ctx); - if (yxi == NULL) goto err; - - if (!BN_bin2bn(buf + 1, field_len, x)) goto err; - if (BN_ucmp(x, &group->field) >= 0) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - - if (form == POINT_CONVERSION_COMPRESSED) - { - if (!EC_POINT_set_compressed_coordinates_GF2m(group, point, x, y_bit, ctx)) goto err; - } - else - { - if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err; - if (BN_ucmp(y, &group->field) >= 0) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - if (form == POINT_CONVERSION_HYBRID) - { - if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err; - if (y_bit != BN_is_odd(yxi)) - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - } - - if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err; - } - - if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */ - { - ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); - goto err; - } - - ret = 1; - - err: - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - } - - /* Computes a + b and stores the result in r. r could be a or b, a could be b. * Uses algorithm A.10.2 of IEEE P1363. */ @@ -887,7 +562,7 @@ int ec_GF2m_simple_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_ field_sqr = group->meth->field_sqr; /* only support affine coordinates */ - if (!point->Z_is_one) goto err; + if (!point->Z_is_one) return -1; if (ctx == NULL) { @@ -1040,3 +715,5 @@ int ec_GF2m_simple_field_div(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, { return BN_GF2m_mod_div(r, a, b, &group->field, ctx); } + +#endif diff --git a/app/openssl/crypto/ec/ec_ameth.c b/app/openssl/crypto/ec/ec_ameth.c index c00f7d74..0ce45240 100644 --- a/app/openssl/crypto/ec/ec_ameth.c +++ b/app/openssl/crypto/ec/ec_ameth.c @@ -88,7 +88,7 @@ static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) if (!pstr) return 0; pstr->length = i2d_ECParameters(ec_key, &pstr->data); - if (pstr->length < 0) + if (pstr->length <= 0) { ASN1_STRING_free(pstr); ECerr(EC_F_ECKEY_PARAM2TYPE, ERR_R_EC_LIB); @@ -651,6 +651,7 @@ const EVP_PKEY_ASN1_METHOD eckey_asn1_meth = ec_copy_parameters, ec_cmp_parameters, eckey_param_print, + 0, int_ec_free, ec_pkey_ctrl, diff --git a/app/openssl/crypto/ec/ec_asn1.c b/app/openssl/crypto/ec/ec_asn1.c index ae555398..145807b6 100644 --- a/app/openssl/crypto/ec/ec_asn1.c +++ b/app/openssl/crypto/ec/ec_asn1.c @@ -83,13 +83,14 @@ int EC_GROUP_get_basis_type(const EC_GROUP *group) /* everything else is currently not supported */ return 0; } - +#ifndef OPENSSL_NO_EC2M int EC_GROUP_get_trinomial_basis(const EC_GROUP *group, unsigned int *k) { if (group == NULL) return 0; - if (EC_GROUP_method_of(group)->group_set_curve != ec_GF2m_simple_group_set_curve + if (EC_METHOD_get_field_type(EC_GROUP_method_of(group)) != + NID_X9_62_characteristic_two_field || !((group->poly[0] != 0) && (group->poly[1] != 0) && (group->poly[2] == 0))) { ECerr(EC_F_EC_GROUP_GET_TRINOMIAL_BASIS, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); @@ -101,14 +102,14 @@ int EC_GROUP_get_trinomial_basis(const EC_GROUP *group, unsigned int *k) return 1; } - int EC_GROUP_get_pentanomial_basis(const EC_GROUP *group, unsigned int *k1, unsigned int *k2, unsigned int *k3) { if (group == NULL) return 0; - if (EC_GROUP_method_of(group)->group_set_curve != ec_GF2m_simple_group_set_curve + if (EC_METHOD_get_field_type(EC_GROUP_method_of(group)) != + NID_X9_62_characteristic_two_field || !((group->poly[0] != 0) && (group->poly[1] != 0) && (group->poly[2] != 0) && (group->poly[3] != 0) && (group->poly[4] == 0))) { ECerr(EC_F_EC_GROUP_GET_PENTANOMIAL_BASIS, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); @@ -124,7 +125,7 @@ int EC_GROUP_get_pentanomial_basis(const EC_GROUP *group, unsigned int *k1, return 1; } - +#endif /* some structures needed for the asn1 encoding */ @@ -340,6 +341,12 @@ static int ec_asn1_group2fieldid(const EC_GROUP *group, X9_62_FIELDID *field) } } else /* nid == NID_X9_62_characteristic_two_field */ +#ifdef OPENSSL_NO_EC2M + { + ECerr(EC_F_EC_ASN1_GROUP2FIELDID, EC_R_GF2M_NOT_SUPPORTED); + goto err; + } +#else { int field_type; X9_62_CHARACTERISTIC_TWO *char_two; @@ -419,6 +426,7 @@ static int ec_asn1_group2fieldid(const EC_GROUP *group, X9_62_FIELDID *field) } } } +#endif ok = 1; @@ -456,6 +464,7 @@ static int ec_asn1_group2curve(const EC_GROUP *group, X9_62_CURVE *curve) goto err; } } +#ifndef OPENSSL_NO_EC2M else /* nid == NID_X9_62_characteristic_two_field */ { if (!EC_GROUP_get_curve_GF2m(group, NULL, tmp_1, tmp_2, NULL)) @@ -464,7 +473,7 @@ static int ec_asn1_group2curve(const EC_GROUP *group, X9_62_CURVE *curve) goto err; } } - +#endif len_1 = (size_t)BN_num_bytes(tmp_1); len_2 = (size_t)BN_num_bytes(tmp_2); @@ -775,8 +784,13 @@ static EC_GROUP *ec_asn1_parameters2group(const ECPARAMETERS *params) /* get the field parameters */ tmp = OBJ_obj2nid(params->fieldID->fieldType); - if (tmp == NID_X9_62_characteristic_two_field) +#ifdef OPENSSL_NO_EC2M + { + ECerr(EC_F_EC_ASN1_PARAMETERS2GROUP, EC_R_GF2M_NOT_SUPPORTED); + goto err; + } +#else { X9_62_CHARACTERISTIC_TWO *char_two; @@ -862,6 +876,7 @@ static EC_GROUP *ec_asn1_parameters2group(const ECPARAMETERS *params) /* create the EC_GROUP structure */ ret = EC_GROUP_new_curve_GF2m(p, a, b, NULL); } +#endif else if (tmp == NID_X9_62_prime_field) { /* we have a curve over a prime field */ @@ -1065,6 +1080,7 @@ EC_GROUP *d2i_ECPKParameters(EC_GROUP **a, const unsigned char **in, long len) if ((group = ec_asn1_pkparameters2group(params)) == NULL) { ECerr(EC_F_D2I_ECPKPARAMETERS, EC_R_PKPARAMETERS2GROUP_FAILURE); + ECPKPARAMETERS_free(params); return NULL; } diff --git a/app/openssl/crypto/ec/ec_curve.c b/app/openssl/crypto/ec/ec_curve.c index 23274e40..c72fb269 100644 --- a/app/openssl/crypto/ec/ec_curve.c +++ b/app/openssl/crypto/ec/ec_curve.c @@ -3,7 +3,7 @@ * Written by Nils Larsch for the OpenSSL project. */ /* ==================================================================== - * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved. + * Copyright (c) 1998-2010 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,6 +72,7 @@ #include "ec_lcl.h" #include #include +#include typedef struct { int field_type, /* either NID_X9_62_prime_field or @@ -703,6 +704,8 @@ static const struct { EC_CURVE_DATA h; unsigned char data[0+28*6]; } 0x13,0xDD,0x29,0x45,0x5C,0x5C,0x2A,0x3D } }; +#ifndef OPENSSL_NO_EC2M + /* characteristic two curves */ static const struct { EC_CURVE_DATA h; unsigned char data[20+15*6]; } _EC_SECG_CHAR2_113R1 = { @@ -1300,7 +1303,7 @@ static const struct { EC_CURVE_DATA h; unsigned char data[20+21*6]; } { 0x53,0x81,0x4C,0x05,0x0D,0x44,0xD6,0x96,0xE6,0x76, /* seed */ 0x87,0x56,0x15,0x17,0x58,0x0C,0xA4,0xE2,0x9F,0xFD, - 0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p */ + 0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01, 0x07, 0x01,0x08,0xB3,0x9E,0x77,0xC4,0xB1,0x08,0xBE,0xD9, /* a */ @@ -1817,103 +1820,128 @@ static const struct { EC_CURVE_DATA h; unsigned char data[0+24*6]; } 0xBA,0xFC,0xA7,0x5E } }; +#endif + typedef struct _ec_list_element_st { int nid; const EC_CURVE_DATA *data; + const EC_METHOD *(*meth)(void); const char *comment; } ec_list_element; static const ec_list_element curve_list[] = { - /* prime field curves */ + /* prime field curves */ /* secg curves */ - { NID_secp112r1, &_EC_SECG_PRIME_112R1.h, "SECG/WTLS curve over a 112 bit prime field"}, - { NID_secp112r2, &_EC_SECG_PRIME_112R2.h, "SECG curve over a 112 bit prime field"}, - { NID_secp128r1, &_EC_SECG_PRIME_128R1.h, "SECG curve over a 128 bit prime field"}, - { NID_secp128r2, &_EC_SECG_PRIME_128R2.h, "SECG curve over a 128 bit prime field"}, - { NID_secp160k1, &_EC_SECG_PRIME_160K1.h, "SECG curve over a 160 bit prime field"}, - { NID_secp160r1, &_EC_SECG_PRIME_160R1.h, "SECG curve over a 160 bit prime field"}, - { NID_secp160r2, &_EC_SECG_PRIME_160R2.h, "SECG/WTLS curve over a 160 bit prime field"}, + { NID_secp112r1, &_EC_SECG_PRIME_112R1.h, 0, "SECG/WTLS curve over a 112 bit prime field" }, + { NID_secp112r2, &_EC_SECG_PRIME_112R2.h, 0, "SECG curve over a 112 bit prime field" }, + { NID_secp128r1, &_EC_SECG_PRIME_128R1.h, 0, "SECG curve over a 128 bit prime field" }, + { NID_secp128r2, &_EC_SECG_PRIME_128R2.h, 0, "SECG curve over a 128 bit prime field" }, + { NID_secp160k1, &_EC_SECG_PRIME_160K1.h, 0, "SECG curve over a 160 bit prime field" }, + { NID_secp160r1, &_EC_SECG_PRIME_160R1.h, 0, "SECG curve over a 160 bit prime field" }, + { NID_secp160r2, &_EC_SECG_PRIME_160R2.h, 0, "SECG/WTLS curve over a 160 bit prime field" }, /* SECG secp192r1 is the same as X9.62 prime192v1 and hence omitted */ - { NID_secp192k1, &_EC_SECG_PRIME_192K1.h, "SECG curve over a 192 bit prime field"}, - { NID_secp224k1, &_EC_SECG_PRIME_224K1.h, "SECG curve over a 224 bit prime field"}, - { NID_secp224r1, &_EC_NIST_PRIME_224.h, "NIST/SECG curve over a 224 bit prime field"}, - { NID_secp256k1, &_EC_SECG_PRIME_256K1.h, "SECG curve over a 256 bit prime field"}, + { NID_secp192k1, &_EC_SECG_PRIME_192K1.h, 0, "SECG curve over a 192 bit prime field" }, + { NID_secp224k1, &_EC_SECG_PRIME_224K1.h, 0, "SECG curve over a 224 bit prime field" }, +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 + { NID_secp224r1, &_EC_NIST_PRIME_224.h, EC_GFp_nistp224_method, "NIST/SECG curve over a 224 bit prime field" }, +#else + { NID_secp224r1, &_EC_NIST_PRIME_224.h, 0, "NIST/SECG curve over a 224 bit prime field" }, +#endif + { NID_secp256k1, &_EC_SECG_PRIME_256K1.h, 0, "SECG curve over a 256 bit prime field" }, /* SECG secp256r1 is the same as X9.62 prime256v1 and hence omitted */ - { NID_secp384r1, &_EC_NIST_PRIME_384.h, "NIST/SECG curve over a 384 bit prime field"}, - { NID_secp521r1, &_EC_NIST_PRIME_521.h, "NIST/SECG curve over a 521 bit prime field"}, + { NID_secp384r1, &_EC_NIST_PRIME_384.h, 0, "NIST/SECG curve over a 384 bit prime field" }, +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 + { NID_secp521r1, &_EC_NIST_PRIME_521.h, EC_GFp_nistp521_method, "NIST/SECG curve over a 521 bit prime field" }, +#else + { NID_secp521r1, &_EC_NIST_PRIME_521.h, 0, "NIST/SECG curve over a 521 bit prime field" }, +#endif /* X9.62 curves */ - { NID_X9_62_prime192v1, &_EC_NIST_PRIME_192.h, "NIST/X9.62/SECG curve over a 192 bit prime field"}, - { NID_X9_62_prime192v2, &_EC_X9_62_PRIME_192V2.h, "X9.62 curve over a 192 bit prime field"}, - { NID_X9_62_prime192v3, &_EC_X9_62_PRIME_192V3.h, "X9.62 curve over a 192 bit prime field"}, - { NID_X9_62_prime239v1, &_EC_X9_62_PRIME_239V1.h, "X9.62 curve over a 239 bit prime field"}, - { NID_X9_62_prime239v2, &_EC_X9_62_PRIME_239V2.h, "X9.62 curve over a 239 bit prime field"}, - { NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, "X9.62 curve over a 239 bit prime field"}, - { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, "X9.62/SECG curve over a 256 bit prime field"}, + { NID_X9_62_prime192v1, &_EC_NIST_PRIME_192.h, 0, "NIST/X9.62/SECG curve over a 192 bit prime field" }, + { NID_X9_62_prime192v2, &_EC_X9_62_PRIME_192V2.h, 0, "X9.62 curve over a 192 bit prime field" }, + { NID_X9_62_prime192v3, &_EC_X9_62_PRIME_192V3.h, 0, "X9.62 curve over a 192 bit prime field" }, + { NID_X9_62_prime239v1, &_EC_X9_62_PRIME_239V1.h, 0, "X9.62 curve over a 239 bit prime field" }, + { NID_X9_62_prime239v2, &_EC_X9_62_PRIME_239V2.h, 0, "X9.62 curve over a 239 bit prime field" }, + { NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, 0, "X9.62 curve over a 239 bit prime field" }, +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 + { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, EC_GFp_nistp256_method, "X9.62/SECG curve over a 256 bit prime field" }, +#else + { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, 0, "X9.62/SECG curve over a 256 bit prime field" }, +#endif +#ifndef OPENSSL_NO_EC2M /* characteristic two field curves */ /* NIST/SECG curves */ - { NID_sect113r1, &_EC_SECG_CHAR2_113R1.h, "SECG curve over a 113 bit binary field"}, - { NID_sect113r2, &_EC_SECG_CHAR2_113R2.h, "SECG curve over a 113 bit binary field"}, - { NID_sect131r1, &_EC_SECG_CHAR2_131R1.h, "SECG/WTLS curve over a 131 bit binary field"}, - { NID_sect131r2, &_EC_SECG_CHAR2_131R2.h, "SECG curve over a 131 bit binary field"}, - { NID_sect163k1, &_EC_NIST_CHAR2_163K.h, "NIST/SECG/WTLS curve over a 163 bit binary field" }, - { NID_sect163r1, &_EC_SECG_CHAR2_163R1.h, "SECG curve over a 163 bit binary field"}, - { NID_sect163r2, &_EC_NIST_CHAR2_163B.h, "NIST/SECG curve over a 163 bit binary field" }, - { NID_sect193r1, &_EC_SECG_CHAR2_193R1.h, "SECG curve over a 193 bit binary field"}, - { NID_sect193r2, &_EC_SECG_CHAR2_193R2.h, "SECG curve over a 193 bit binary field"}, - { NID_sect233k1, &_EC_NIST_CHAR2_233K.h, "NIST/SECG/WTLS curve over a 233 bit binary field" }, - { NID_sect233r1, &_EC_NIST_CHAR2_233B.h, "NIST/SECG/WTLS curve over a 233 bit binary field" }, - { NID_sect239k1, &_EC_SECG_CHAR2_239K1.h, "SECG curve over a 239 bit binary field"}, - { NID_sect283k1, &_EC_NIST_CHAR2_283K.h, "NIST/SECG curve over a 283 bit binary field" }, - { NID_sect283r1, &_EC_NIST_CHAR2_283B.h, "NIST/SECG curve over a 283 bit binary field" }, - { NID_sect409k1, &_EC_NIST_CHAR2_409K.h, "NIST/SECG curve over a 409 bit binary field" }, - { NID_sect409r1, &_EC_NIST_CHAR2_409B.h, "NIST/SECG curve over a 409 bit binary field" }, - { NID_sect571k1, &_EC_NIST_CHAR2_571K.h, "NIST/SECG curve over a 571 bit binary field" }, - { NID_sect571r1, &_EC_NIST_CHAR2_571B.h, "NIST/SECG curve over a 571 bit binary field" }, + { NID_sect113r1, &_EC_SECG_CHAR2_113R1.h, 0, "SECG curve over a 113 bit binary field" }, + { NID_sect113r2, &_EC_SECG_CHAR2_113R2.h, 0, "SECG curve over a 113 bit binary field" }, + { NID_sect131r1, &_EC_SECG_CHAR2_131R1.h, 0, "SECG/WTLS curve over a 131 bit binary field" }, + { NID_sect131r2, &_EC_SECG_CHAR2_131R2.h, 0, "SECG curve over a 131 bit binary field" }, + { NID_sect163k1, &_EC_NIST_CHAR2_163K.h, 0, "NIST/SECG/WTLS curve over a 163 bit binary field" }, + { NID_sect163r1, &_EC_SECG_CHAR2_163R1.h, 0, "SECG curve over a 163 bit binary field" }, + { NID_sect163r2, &_EC_NIST_CHAR2_163B.h, 0, "NIST/SECG curve over a 163 bit binary field" }, + { NID_sect193r1, &_EC_SECG_CHAR2_193R1.h, 0, "SECG curve over a 193 bit binary field" }, + { NID_sect193r2, &_EC_SECG_CHAR2_193R2.h, 0, "SECG curve over a 193 bit binary field" }, + { NID_sect233k1, &_EC_NIST_CHAR2_233K.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" }, + { NID_sect233r1, &_EC_NIST_CHAR2_233B.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" }, + { NID_sect239k1, &_EC_SECG_CHAR2_239K1.h, 0, "SECG curve over a 239 bit binary field" }, + { NID_sect283k1, &_EC_NIST_CHAR2_283K.h, 0, "NIST/SECG curve over a 283 bit binary field" }, + { NID_sect283r1, &_EC_NIST_CHAR2_283B.h, 0, "NIST/SECG curve over a 283 bit binary field" }, + { NID_sect409k1, &_EC_NIST_CHAR2_409K.h, 0, "NIST/SECG curve over a 409 bit binary field" }, + { NID_sect409r1, &_EC_NIST_CHAR2_409B.h, 0, "NIST/SECG curve over a 409 bit binary field" }, + { NID_sect571k1, &_EC_NIST_CHAR2_571K.h, 0, "NIST/SECG curve over a 571 bit binary field" }, + { NID_sect571r1, &_EC_NIST_CHAR2_571B.h, 0, "NIST/SECG curve over a 571 bit binary field" }, /* X9.62 curves */ - { NID_X9_62_c2pnb163v1, &_EC_X9_62_CHAR2_163V1.h, "X9.62 curve over a 163 bit binary field"}, - { NID_X9_62_c2pnb163v2, &_EC_X9_62_CHAR2_163V2.h, "X9.62 curve over a 163 bit binary field"}, - { NID_X9_62_c2pnb163v3, &_EC_X9_62_CHAR2_163V3.h, "X9.62 curve over a 163 bit binary field"}, - { NID_X9_62_c2pnb176v1, &_EC_X9_62_CHAR2_176V1.h, "X9.62 curve over a 176 bit binary field"}, - { NID_X9_62_c2tnb191v1, &_EC_X9_62_CHAR2_191V1.h, "X9.62 curve over a 191 bit binary field"}, - { NID_X9_62_c2tnb191v2, &_EC_X9_62_CHAR2_191V2.h, "X9.62 curve over a 191 bit binary field"}, - { NID_X9_62_c2tnb191v3, &_EC_X9_62_CHAR2_191V3.h, "X9.62 curve over a 191 bit binary field"}, - { NID_X9_62_c2pnb208w1, &_EC_X9_62_CHAR2_208W1.h, "X9.62 curve over a 208 bit binary field"}, - { NID_X9_62_c2tnb239v1, &_EC_X9_62_CHAR2_239V1.h, "X9.62 curve over a 239 bit binary field"}, - { NID_X9_62_c2tnb239v2, &_EC_X9_62_CHAR2_239V2.h, "X9.62 curve over a 239 bit binary field"}, - { NID_X9_62_c2tnb239v3, &_EC_X9_62_CHAR2_239V3.h, "X9.62 curve over a 239 bit binary field"}, - { NID_X9_62_c2pnb272w1, &_EC_X9_62_CHAR2_272W1.h, "X9.62 curve over a 272 bit binary field"}, - { NID_X9_62_c2pnb304w1, &_EC_X9_62_CHAR2_304W1.h, "X9.62 curve over a 304 bit binary field"}, - { NID_X9_62_c2tnb359v1, &_EC_X9_62_CHAR2_359V1.h, "X9.62 curve over a 359 bit binary field"}, - { NID_X9_62_c2pnb368w1, &_EC_X9_62_CHAR2_368W1.h, "X9.62 curve over a 368 bit binary field"}, - { NID_X9_62_c2tnb431r1, &_EC_X9_62_CHAR2_431R1.h, "X9.62 curve over a 431 bit binary field"}, + { NID_X9_62_c2pnb163v1, &_EC_X9_62_CHAR2_163V1.h, 0, "X9.62 curve over a 163 bit binary field" }, + { NID_X9_62_c2pnb163v2, &_EC_X9_62_CHAR2_163V2.h, 0, "X9.62 curve over a 163 bit binary field" }, + { NID_X9_62_c2pnb163v3, &_EC_X9_62_CHAR2_163V3.h, 0, "X9.62 curve over a 163 bit binary field" }, + { NID_X9_62_c2pnb176v1, &_EC_X9_62_CHAR2_176V1.h, 0, "X9.62 curve over a 176 bit binary field" }, + { NID_X9_62_c2tnb191v1, &_EC_X9_62_CHAR2_191V1.h, 0, "X9.62 curve over a 191 bit binary field" }, + { NID_X9_62_c2tnb191v2, &_EC_X9_62_CHAR2_191V2.h, 0, "X9.62 curve over a 191 bit binary field" }, + { NID_X9_62_c2tnb191v3, &_EC_X9_62_CHAR2_191V3.h, 0, "X9.62 curve over a 191 bit binary field" }, + { NID_X9_62_c2pnb208w1, &_EC_X9_62_CHAR2_208W1.h, 0, "X9.62 curve over a 208 bit binary field" }, + { NID_X9_62_c2tnb239v1, &_EC_X9_62_CHAR2_239V1.h, 0, "X9.62 curve over a 239 bit binary field" }, + { NID_X9_62_c2tnb239v2, &_EC_X9_62_CHAR2_239V2.h, 0, "X9.62 curve over a 239 bit binary field" }, + { NID_X9_62_c2tnb239v3, &_EC_X9_62_CHAR2_239V3.h, 0, "X9.62 curve over a 239 bit binary field" }, + { NID_X9_62_c2pnb272w1, &_EC_X9_62_CHAR2_272W1.h, 0, "X9.62 curve over a 272 bit binary field" }, + { NID_X9_62_c2pnb304w1, &_EC_X9_62_CHAR2_304W1.h, 0, "X9.62 curve over a 304 bit binary field" }, + { NID_X9_62_c2tnb359v1, &_EC_X9_62_CHAR2_359V1.h, 0, "X9.62 curve over a 359 bit binary field" }, + { NID_X9_62_c2pnb368w1, &_EC_X9_62_CHAR2_368W1.h, 0, "X9.62 curve over a 368 bit binary field" }, + { NID_X9_62_c2tnb431r1, &_EC_X9_62_CHAR2_431R1.h, 0, "X9.62 curve over a 431 bit binary field" }, /* the WAP/WTLS curves * [unlike SECG, spec has its own OIDs for curves from X9.62] */ - { NID_wap_wsg_idm_ecid_wtls1, &_EC_WTLS_1.h, "WTLS curve over a 113 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls3, &_EC_NIST_CHAR2_163K.h, "NIST/SECG/WTLS curve over a 163 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls4, &_EC_SECG_CHAR2_113R1.h, "SECG curve over a 113 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls5, &_EC_X9_62_CHAR2_163V1.h, "X9.62 curve over a 163 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls6, &_EC_SECG_PRIME_112R1.h, "SECG/WTLS curve over a 112 bit prime field"}, - { NID_wap_wsg_idm_ecid_wtls7, &_EC_SECG_PRIME_160R2.h, "SECG/WTLS curve over a 160 bit prime field"}, - { NID_wap_wsg_idm_ecid_wtls8, &_EC_WTLS_8.h, "WTLS curve over a 112 bit prime field"}, - { NID_wap_wsg_idm_ecid_wtls9, &_EC_WTLS_9.h, "WTLS curve over a 160 bit prime field" }, - { NID_wap_wsg_idm_ecid_wtls10, &_EC_NIST_CHAR2_233K.h, "NIST/SECG/WTLS curve over a 233 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls11, &_EC_NIST_CHAR2_233B.h, "NIST/SECG/WTLS curve over a 233 bit binary field"}, - { NID_wap_wsg_idm_ecid_wtls12, &_EC_WTLS_12.h, "WTLS curvs over a 224 bit prime field"}, + { NID_wap_wsg_idm_ecid_wtls1, &_EC_WTLS_1.h, 0, "WTLS curve over a 113 bit binary field" }, + { NID_wap_wsg_idm_ecid_wtls3, &_EC_NIST_CHAR2_163K.h, 0, "NIST/SECG/WTLS curve over a 163 bit binary field" }, + { NID_wap_wsg_idm_ecid_wtls4, &_EC_SECG_CHAR2_113R1.h, 0, "SECG curve over a 113 bit binary field" }, + { NID_wap_wsg_idm_ecid_wtls5, &_EC_X9_62_CHAR2_163V1.h, 0, "X9.62 curve over a 163 bit binary field" }, +#endif + { NID_wap_wsg_idm_ecid_wtls6, &_EC_SECG_PRIME_112R1.h, 0, "SECG/WTLS curve over a 112 bit prime field" }, + { NID_wap_wsg_idm_ecid_wtls7, &_EC_SECG_PRIME_160R2.h, 0, "SECG/WTLS curve over a 160 bit prime field" }, + { NID_wap_wsg_idm_ecid_wtls8, &_EC_WTLS_8.h, 0, "WTLS curve over a 112 bit prime field" }, + { NID_wap_wsg_idm_ecid_wtls9, &_EC_WTLS_9.h, 0, "WTLS curve over a 160 bit prime field" }, +#ifndef OPENSSL_NO_EC2M + { NID_wap_wsg_idm_ecid_wtls10, &_EC_NIST_CHAR2_233K.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" }, + { NID_wap_wsg_idm_ecid_wtls11, &_EC_NIST_CHAR2_233B.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" }, +#endif + { NID_wap_wsg_idm_ecid_wtls12, &_EC_WTLS_12.h, 0, "WTLS curvs over a 224 bit prime field" }, +#ifndef OPENSSL_NO_EC2M /* IPSec curves */ - { NID_ipsec3, &_EC_IPSEC_155_ID3.h, "\n\tIPSec/IKE/Oakley curve #3 over a 155 bit binary field.\n""\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, - { NID_ipsec4, &_EC_IPSEC_185_ID4.h, "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n""\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, + { NID_ipsec3, &_EC_IPSEC_155_ID3.h, 0, "\n\tIPSec/IKE/Oakley curve #3 over a 155 bit binary field.\n" + "\tNot suitable for ECDSA.\n\tQuestionable extension field!" }, + { NID_ipsec4, &_EC_IPSEC_185_ID4.h, 0, "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n" + "\tNot suitable for ECDSA.\n\tQuestionable extension field!" }, +#endif }; #define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element)) -static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) +static EC_GROUP *ec_group_new_from_data(const ec_list_element curve) { EC_GROUP *group=NULL; EC_POINT *P=NULL; BN_CTX *ctx=NULL; - BIGNUM *p=NULL, *a=NULL, *b=NULL, *x=NULL, *y=NULL, *order=NULL; + BIGNUM *p=NULL, *a=NULL, *b=NULL, *x=NULL, *y=NULL, *order=NULL; int ok=0; int seed_len,param_len; + const EC_METHOD *meth; + const EC_CURVE_DATA *data; const unsigned char *params; if ((ctx = BN_CTX_new()) == NULL) @@ -1922,10 +1950,11 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) goto err; } + data = curve.data; seed_len = data->seed_len; param_len = data->param_len; - params = (const unsigned char *)(data+1); /* skip header */ - params += seed_len; /* skip seed */ + params = (const unsigned char *)(data+1); /* skip header */ + params += seed_len; /* skip seed */ if (!(p = BN_bin2bn(params+0*param_len, param_len, NULL)) || !(a = BN_bin2bn(params+1*param_len, param_len, NULL)) @@ -1935,7 +1964,17 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) goto err; } - if (data->field_type == NID_X9_62_prime_field) + if (curve.meth != 0) + { + meth = curve.meth(); + if (((group = EC_GROUP_new(meth)) == NULL) || + (!(group->meth->group_set_curve(group, p, a, b, ctx)))) + { + ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB); + goto err; + } + } + else if (data->field_type == NID_X9_62_prime_field) { if ((group = EC_GROUP_new_curve_GFp(p, a, b, ctx)) == NULL) { @@ -1943,6 +1982,7 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) goto err; } } +#ifndef OPENSSL_NO_EC2M else /* field_type == NID_X9_62_characteristic_two_field */ { if ((group = EC_GROUP_new_curve_GF2m(p, a, b, ctx)) == NULL) @@ -1951,20 +1991,21 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) goto err; } } +#endif if ((P = EC_POINT_new(group)) == NULL) { ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB); goto err; } - + if (!(x = BN_bin2bn(params+3*param_len, param_len, NULL)) || !(y = BN_bin2bn(params+4*param_len, param_len, NULL))) { ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_BN_LIB); goto err; } - if (!EC_POINT_set_affine_coordinates_GF2m(group, P, x, y, ctx)) + if (!EC_POINT_set_affine_coordinates_GFp(group, P, x, y, ctx)) { ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB); goto err; @@ -2025,7 +2066,7 @@ EC_GROUP *EC_GROUP_new_by_curve_name(int nid) for (i=0; i + */ + meth = EC_GFp_mont_method(); +#else meth = EC_GFp_nist_method(); +#endif ret = EC_GROUP_new(meth); if (ret == NULL) @@ -122,7 +147,7 @@ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM return ret; } - +#ifndef OPENSSL_NO_EC2M EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { const EC_METHOD *meth; @@ -142,3 +167,4 @@ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM return ret; } +#endif diff --git a/app/openssl/crypto/ec/ec_err.c b/app/openssl/crypto/ec/ec_err.c index 84b48333..0d193987 100644 --- a/app/openssl/crypto/ec/ec_err.c +++ b/app/openssl/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* crypto/ec/ec_err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,6 +70,7 @@ static ERR_STRING_DATA EC_str_functs[]= { +{ERR_FUNC(EC_F_BN_TO_FELEM), "BN_TO_FELEM"}, {ERR_FUNC(EC_F_COMPUTE_WNAF), "COMPUTE_WNAF"}, {ERR_FUNC(EC_F_D2I_ECPARAMETERS), "d2i_ECParameters"}, {ERR_FUNC(EC_F_D2I_ECPKPARAMETERS), "d2i_ECPKParameters"}, @@ -112,6 +113,15 @@ static ERR_STRING_DATA EC_str_functs[]= {ERR_FUNC(EC_F_EC_GFP_MONT_FIELD_SQR), "ec_GFp_mont_field_sqr"}, {ERR_FUNC(EC_F_EC_GFP_MONT_GROUP_SET_CURVE), "ec_GFp_mont_group_set_curve"}, {ERR_FUNC(EC_F_EC_GFP_MONT_GROUP_SET_CURVE_GFP), "EC_GFP_MONT_GROUP_SET_CURVE_GFP"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE), "ec_GFp_nistp224_group_set_curve"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP224_POINTS_MUL), "ec_GFp_nistp224_points_mul"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES), "ec_GFp_nistp224_point_get_affine_coordinates"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE), "ec_GFp_nistp256_group_set_curve"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP256_POINTS_MUL), "ec_GFp_nistp256_points_mul"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES), "ec_GFp_nistp256_point_get_affine_coordinates"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE), "ec_GFp_nistp521_group_set_curve"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP521_POINTS_MUL), "ec_GFp_nistp521_points_mul"}, +{ERR_FUNC(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES), "ec_GFp_nistp521_point_get_affine_coordinates"}, {ERR_FUNC(EC_F_EC_GFP_NIST_FIELD_MUL), "ec_GFp_nist_field_mul"}, {ERR_FUNC(EC_F_EC_GFP_NIST_FIELD_SQR), "ec_GFp_nist_field_sqr"}, {ERR_FUNC(EC_F_EC_GFP_NIST_GROUP_SET_CURVE), "ec_GFp_nist_group_set_curve"}, @@ -154,6 +164,7 @@ static ERR_STRING_DATA EC_str_functs[]= {ERR_FUNC(EC_F_EC_KEY_NEW), "EC_KEY_new"}, {ERR_FUNC(EC_F_EC_KEY_PRINT), "EC_KEY_print"}, {ERR_FUNC(EC_F_EC_KEY_PRINT_FP), "EC_KEY_print_fp"}, +{ERR_FUNC(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES), "EC_KEY_set_public_key_affine_coordinates"}, {ERR_FUNC(EC_F_EC_POINTS_MAKE_AFFINE), "EC_POINTs_make_affine"}, {ERR_FUNC(EC_F_EC_POINT_ADD), "EC_POINT_add"}, {ERR_FUNC(EC_F_EC_POINT_CMP), "EC_POINT_cmp"}, @@ -184,6 +195,9 @@ static ERR_STRING_DATA EC_str_functs[]= {ERR_FUNC(EC_F_I2D_ECPKPARAMETERS), "i2d_ECPKParameters"}, {ERR_FUNC(EC_F_I2D_ECPRIVATEKEY), "i2d_ECPrivateKey"}, {ERR_FUNC(EC_F_I2O_ECPUBLICKEY), "i2o_ECPublicKey"}, +{ERR_FUNC(EC_F_NISTP224_PRE_COMP_NEW), "NISTP224_PRE_COMP_NEW"}, +{ERR_FUNC(EC_F_NISTP256_PRE_COMP_NEW), "NISTP256_PRE_COMP_NEW"}, +{ERR_FUNC(EC_F_NISTP521_PRE_COMP_NEW), "NISTP521_PRE_COMP_NEW"}, {ERR_FUNC(EC_F_O2I_ECPUBLICKEY), "o2i_ECPublicKey"}, {ERR_FUNC(EC_F_OLD_EC_PRIV_DECODE), "OLD_EC_PRIV_DECODE"}, {ERR_FUNC(EC_F_PKEY_EC_CTRL), "PKEY_EC_CTRL"}, @@ -199,12 +213,15 @@ static ERR_STRING_DATA EC_str_reasons[]= { {ERR_REASON(EC_R_ASN1_ERROR) ,"asn1 error"}, {ERR_REASON(EC_R_ASN1_UNKNOWN_FIELD) ,"asn1 unknown field"}, +{ERR_REASON(EC_R_BIGNUM_OUT_OF_RANGE) ,"bignum out of range"}, {ERR_REASON(EC_R_BUFFER_TOO_SMALL) ,"buffer too small"}, +{ERR_REASON(EC_R_COORDINATES_OUT_OF_RANGE),"coordinates out of range"}, {ERR_REASON(EC_R_D2I_ECPKPARAMETERS_FAILURE),"d2i ecpkparameters failure"}, {ERR_REASON(EC_R_DECODE_ERROR) ,"decode error"}, {ERR_REASON(EC_R_DISCRIMINANT_IS_ZERO) ,"discriminant is zero"}, {ERR_REASON(EC_R_EC_GROUP_NEW_BY_NAME_FAILURE),"ec group new by name failure"}, {ERR_REASON(EC_R_FIELD_TOO_LARGE) ,"field too large"}, +{ERR_REASON(EC_R_GF2M_NOT_SUPPORTED) ,"gf2m not supported"}, {ERR_REASON(EC_R_GROUP2PKPARAMETERS_FAILURE),"group2pkparameters failure"}, {ERR_REASON(EC_R_I2D_ECPKPARAMETERS_FAILURE),"i2d ecpkparameters failure"}, {ERR_REASON(EC_R_INCOMPATIBLE_OBJECTS) ,"incompatible objects"}, @@ -239,6 +256,7 @@ static ERR_STRING_DATA EC_str_reasons[]= {ERR_REASON(EC_R_UNKNOWN_GROUP) ,"unknown group"}, {ERR_REASON(EC_R_UNKNOWN_ORDER) ,"unknown order"}, {ERR_REASON(EC_R_UNSUPPORTED_FIELD) ,"unsupported field"}, +{ERR_REASON(EC_R_WRONG_CURVE_PARAMETERS) ,"wrong curve parameters"}, {ERR_REASON(EC_R_WRONG_ORDER) ,"wrong order"}, {0,NULL} }; diff --git a/app/openssl/crypto/ec/ec_key.c b/app/openssl/crypto/ec/ec_key.c index 522802c0..73dd7b97 100644 --- a/app/openssl/crypto/ec/ec_key.c +++ b/app/openssl/crypto/ec/ec_key.c @@ -64,7 +64,9 @@ #include #include "ec_lcl.h" #include -#include +#ifdef OPENSSL_FIPS +#include +#endif EC_KEY *EC_KEY_new(void) { @@ -78,10 +80,12 @@ EC_KEY *EC_KEY_new(void) } ret->version = 1; + ret->flags = 0; ret->group = NULL; ret->pub_key = NULL; ret->priv_key= NULL; ret->enc_flag= 0; + ret->nonce_from_hash_flag = 0; ret->conv_form = POINT_CONVERSION_UNCOMPRESSED; ret->references= 1; ret->method_data = NULL; @@ -195,8 +199,10 @@ EC_KEY *EC_KEY_copy(EC_KEY *dest, const EC_KEY *src) /* copy the rest */ dest->enc_flag = src->enc_flag; + dest->nonce_from_hash_flag = src->nonce_from_hash_flag; dest->conv_form = src->conv_form; dest->version = src->version; + dest->flags = src->flags; return dest; } @@ -237,6 +243,11 @@ int EC_KEY_generate_key(EC_KEY *eckey) BIGNUM *priv_key = NULL, *order = NULL; EC_POINT *pub_key = NULL; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_key_generate_key(eckey); +#endif + if (!eckey || !eckey->group) { ECerr(EC_F_EC_KEY_GENERATE_KEY, ERR_R_PASSED_NULL_PARAMETER); @@ -371,6 +382,82 @@ err: return(ok); } +int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y) + { + BN_CTX *ctx = NULL; + BIGNUM *tx, *ty; + EC_POINT *point = NULL; + int ok = 0, tmp_nid, is_char_two = 0; + + if (!key || !key->group || !x || !y) + { + ECerr(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES, + ERR_R_PASSED_NULL_PARAMETER); + return 0; + } + ctx = BN_CTX_new(); + if (!ctx) + goto err; + + point = EC_POINT_new(key->group); + + if (!point) + goto err; + + tmp_nid = EC_METHOD_get_field_type(EC_GROUP_method_of(key->group)); + + if (tmp_nid == NID_X9_62_characteristic_two_field) + is_char_two = 1; + + tx = BN_CTX_get(ctx); + ty = BN_CTX_get(ctx); +#ifndef OPENSSL_NO_EC2M + if (is_char_two) + { + if (!EC_POINT_set_affine_coordinates_GF2m(key->group, point, + x, y, ctx)) + goto err; + if (!EC_POINT_get_affine_coordinates_GF2m(key->group, point, + tx, ty, ctx)) + goto err; + } + else +#endif + { + if (!EC_POINT_set_affine_coordinates_GFp(key->group, point, + x, y, ctx)) + goto err; + if (!EC_POINT_get_affine_coordinates_GFp(key->group, point, + tx, ty, ctx)) + goto err; + } + /* Check if retrieved coordinates match originals: if not values + * are out of range. + */ + if (BN_cmp(x, tx) || BN_cmp(y, ty)) + { + ECerr(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES, + EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + + if (!EC_KEY_set_public_key(key, point)) + goto err; + + if (EC_KEY_check_key(key) == 0) + goto err; + + ok = 1; + + err: + if (ctx) + BN_CTX_free(ctx); + if (point) + EC_POINT_free(point); + return ok; + + } + const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key) { return key->group; @@ -420,6 +507,16 @@ void EC_KEY_set_enc_flags(EC_KEY *key, unsigned int flags) key->enc_flag = flags; } +int EC_KEY_get_nonce_from_hash(const EC_KEY *key) + { + return key->nonce_from_hash_flag; + } + +void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on) + { + key->nonce_from_hash_flag = on != 0; + } + point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key) { return key->conv_form; @@ -435,18 +532,27 @@ void EC_KEY_set_conv_form(EC_KEY *key, point_conversion_form_t cform) void *EC_KEY_get_key_method_data(EC_KEY *key, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)) { - return EC_EX_DATA_get_data(key->method_data, dup_func, free_func, clear_free_func); + void *ret; + + CRYPTO_r_lock(CRYPTO_LOCK_EC); + ret = EC_EX_DATA_get_data(key->method_data, dup_func, free_func, clear_free_func); + CRYPTO_r_unlock(CRYPTO_LOCK_EC); + + return ret; } -void EC_KEY_insert_key_method_data(EC_KEY *key, void *data, +void *EC_KEY_insert_key_method_data(EC_KEY *key, void *data, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)) { EC_EXTRA_DATA *ex_data; + CRYPTO_w_lock(CRYPTO_LOCK_EC); ex_data = EC_EX_DATA_get_data(key->method_data, dup_func, free_func, clear_free_func); if (ex_data == NULL) EC_EX_DATA_set_data(&key->method_data, data, dup_func, free_func, clear_free_func); CRYPTO_w_unlock(CRYPTO_LOCK_EC); + + return ex_data; } void EC_KEY_set_asn1_flag(EC_KEY *key, int flag) @@ -461,3 +567,18 @@ int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx) return 0; return EC_GROUP_precompute_mult(key->group, ctx); } + +int EC_KEY_get_flags(const EC_KEY *key) + { + return key->flags; + } + +void EC_KEY_set_flags(EC_KEY *key, int flags) + { + key->flags |= flags; + } + +void EC_KEY_clear_flags(EC_KEY *key, int flags) + { + key->flags &= ~flags; + } diff --git a/app/openssl/crypto/ec/ec_lcl.h b/app/openssl/crypto/ec/ec_lcl.h index 3e2c34b0..6f714c75 100644 --- a/app/openssl/crypto/ec/ec_lcl.h +++ b/app/openssl/crypto/ec/ec_lcl.h @@ -3,7 +3,7 @@ * Originally written by Bodo Moeller for the OpenSSL project. */ /* ==================================================================== - * Copyright (c) 1998-2003 The OpenSSL Project. All rights reserved. + * Copyright (c) 1998-2010 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -82,10 +82,15 @@ # endif #endif +/* Use default functions for poin2oct, oct2point and compressed coordinates */ +#define EC_FLAGS_DEFAULT_OCT 0x1 + /* Structure details are not part of the exported interface, * so all this may change in future versions. */ struct ec_method_st { + /* Various method flags */ + int flags; /* used by EC_METHOD_get_field_type: */ int field_type; /* a NID */ @@ -241,9 +246,11 @@ struct ec_key_st { BIGNUM *priv_key; unsigned int enc_flag; + char nonce_from_hash_flag; point_conversion_form_t conv_form; int references; + int flags; EC_EXTRA_DATA *method_data; } /* EC_KEY */; @@ -391,3 +398,50 @@ int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *); int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); int ec_GF2m_have_precompute_mult(const EC_GROUP *group); + +/* method functions in ec2_mult.c */ +int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, + size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *); +int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); +int ec_GF2m_have_precompute_mult(const EC_GROUP *group); + +#ifndef OPENSSL_EC_NISTP_64_GCC_128 +/* method functions in ecp_nistp224.c */ +int ec_GFp_nistp224_group_init(EC_GROUP *group); +int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); +int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); +int ec_GFp_nistp224_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *); +int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *ctx); +int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx); +int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group); + +/* method functions in ecp_nistp256.c */ +int ec_GFp_nistp256_group_init(EC_GROUP *group); +int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); +int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group, const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); +int ec_GFp_nistp256_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *); +int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *ctx); +int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx); +int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group); + +/* method functions in ecp_nistp521.c */ +int ec_GFp_nistp521_group_init(EC_GROUP *group); +int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *); +int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group, const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); +int ec_GFp_nistp521_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *); +int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *ctx); +int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx); +int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group); + +/* utility functions in ecp_nistputil.c */ +void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array, + size_t felem_size, void *tmp_felems, + void (*felem_one)(void *out), + int (*felem_is_zero)(const void *in), + void (*felem_assign)(void *out, const void *in), + void (*felem_square)(void *out, const void *in), + void (*felem_mul)(void *out, const void *in1, const void *in2), + void (*felem_inv)(void *out, const void *in), + void (*felem_contract)(void *out, const void *in)); +void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign, unsigned char *digit, unsigned char in); +#endif diff --git a/app/openssl/crypto/ec/ec_lib.c b/app/openssl/crypto/ec/ec_lib.c index dd7da0fc..de9a0cc2 100644 --- a/app/openssl/crypto/ec/ec_lib.c +++ b/app/openssl/crypto/ec/ec_lib.c @@ -425,7 +425,7 @@ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM * return group->meth->group_get_curve(group, p, a, b, ctx); } - +#ifndef OPENSSL_NO_EC2M int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { if (group->meth->group_set_curve == 0) @@ -446,7 +446,7 @@ int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM } return group->meth->group_get_curve(group, p, a, b, ctx); } - +#endif int EC_GROUP_get_degree(const EC_GROUP *group) { @@ -480,10 +480,10 @@ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx) if (EC_METHOD_get_field_type(EC_GROUP_method_of(a)) != EC_METHOD_get_field_type(EC_GROUP_method_of(b))) return 1; - /* compare the curve name (if present) */ + /* compare the curve name (if present in both) */ if (EC_GROUP_get_curve_name(a) && EC_GROUP_get_curve_name(b) && - EC_GROUP_get_curve_name(a) == EC_GROUP_get_curve_name(b)) - return 0; + EC_GROUP_get_curve_name(a) != EC_GROUP_get_curve_name(b)) + return 1; if (!ctx) ctx_new = ctx = BN_CTX_new(); @@ -856,7 +856,7 @@ int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *point, return group->meth->point_set_affine_coordinates(group, point, x, y, ctx); } - +#ifndef OPENSSL_NO_EC2M int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *point, const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx) { @@ -872,7 +872,7 @@ int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *point, } return group->meth->point_set_affine_coordinates(group, point, x, y, ctx); } - +#endif int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) @@ -890,7 +890,7 @@ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, const EC_POINT *p return group->meth->point_get_affine_coordinates(group, point, x, y, ctx); } - +#ifndef OPENSSL_NO_EC2M int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) { @@ -906,75 +906,7 @@ int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, const EC_POINT * } return group->meth->point_get_affine_coordinates(group, point, x, y, ctx); } - - -int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *point, - const BIGNUM *x, int y_bit, BN_CTX *ctx) - { - if (group->meth->point_set_compressed_coordinates == 0) - { - ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return 0; - } - if (group->meth != point->meth) - { - ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, EC_R_INCOMPATIBLE_OBJECTS); - return 0; - } - return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx); - } - - -int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *point, - const BIGNUM *x, int y_bit, BN_CTX *ctx) - { - if (group->meth->point_set_compressed_coordinates == 0) - { - ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return 0; - } - if (group->meth != point->meth) - { - ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, EC_R_INCOMPATIBLE_OBJECTS); - return 0; - } - return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx); - } - - -size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, - unsigned char *buf, size_t len, BN_CTX *ctx) - { - if (group->meth->point2oct == 0) - { - ECerr(EC_F_EC_POINT_POINT2OCT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return 0; - } - if (group->meth != point->meth) - { - ECerr(EC_F_EC_POINT_POINT2OCT, EC_R_INCOMPATIBLE_OBJECTS); - return 0; - } - return group->meth->point2oct(group, point, form, buf, len, ctx); - } - - -int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *point, - const unsigned char *buf, size_t len, BN_CTX *ctx) - { - if (group->meth->oct2point == 0) - { - ECerr(EC_F_EC_POINT_OCT2POINT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return 0; - } - if (group->meth != point->meth) - { - ECerr(EC_F_EC_POINT_OCT2POINT, EC_R_INCOMPATIBLE_OBJECTS); - return 0; - } - return group->meth->oct2point(group, point, buf, len, ctx); - } - +#endif int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx) { @@ -1061,12 +993,12 @@ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN if (group->meth->point_cmp == 0) { ECerr(EC_F_EC_POINT_CMP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return 0; + return -1; } if ((group->meth != a->meth) || (a->meth != b->meth)) { ECerr(EC_F_EC_POINT_CMP, EC_R_INCOMPATIBLE_OBJECTS); - return 0; + return -1; } return group->meth->point_cmp(group, a, b, ctx); } diff --git a/app/openssl/crypto/ec/ec_oct.c b/app/openssl/crypto/ec/ec_oct.c new file mode 100644 index 00000000..fd9db079 --- /dev/null +++ b/app/openssl/crypto/ec/ec_oct.c @@ -0,0 +1,199 @@ +/* crypto/ec/ec_lib.c */ +/* + * Originally written by Bodo Moeller for the OpenSSL project. + */ +/* ==================================================================== + * Copyright (c) 1998-2003 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * Binary polynomial ECC support in OpenSSL originally developed by + * SUN MICROSYSTEMS, INC., and contributed to the OpenSSL project. + */ + +#include + +#include +#include + +#include "ec_lcl.h" + +int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *point, + const BIGNUM *x, int y_bit, BN_CTX *ctx) + { + if (group->meth->point_set_compressed_coordinates == 0 + && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT)) + { + ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; + } + if (group->meth != point->meth) + { + ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + if(group->meth->flags & EC_FLAGS_DEFAULT_OCT) + { + if (group->meth->field_type == NID_X9_62_prime_field) + return ec_GFp_simple_set_compressed_coordinates( + group, point, x, y_bit, ctx); + else +#ifdef OPENSSL_NO_EC2M + { + ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, EC_R_GF2M_NOT_SUPPORTED); + return 0; + } +#else + return ec_GF2m_simple_set_compressed_coordinates( + group, point, x, y_bit, ctx); +#endif + } + return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx); + } + +#ifndef OPENSSL_NO_EC2M +int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *point, + const BIGNUM *x, int y_bit, BN_CTX *ctx) + { + if (group->meth->point_set_compressed_coordinates == 0 + && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT)) + { + ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; + } + if (group->meth != point->meth) + { + ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + if(group->meth->flags & EC_FLAGS_DEFAULT_OCT) + { + if (group->meth->field_type == NID_X9_62_prime_field) + return ec_GFp_simple_set_compressed_coordinates( + group, point, x, y_bit, ctx); + else + return ec_GF2m_simple_set_compressed_coordinates( + group, point, x, y_bit, ctx); + } + return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx); + } +#endif + +size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, + unsigned char *buf, size_t len, BN_CTX *ctx) + { + if (group->meth->point2oct == 0 + && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT)) + { + ECerr(EC_F_EC_POINT_POINT2OCT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; + } + if (group->meth != point->meth) + { + ECerr(EC_F_EC_POINT_POINT2OCT, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + if(group->meth->flags & EC_FLAGS_DEFAULT_OCT) + { + if (group->meth->field_type == NID_X9_62_prime_field) + return ec_GFp_simple_point2oct(group, point, + form, buf, len, ctx); + else +#ifdef OPENSSL_NO_EC2M + { + ECerr(EC_F_EC_POINT_POINT2OCT, EC_R_GF2M_NOT_SUPPORTED); + return 0; + } +#else + return ec_GF2m_simple_point2oct(group, point, + form, buf, len, ctx); +#endif + } + + return group->meth->point2oct(group, point, form, buf, len, ctx); + } + + +int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *point, + const unsigned char *buf, size_t len, BN_CTX *ctx) + { + if (group->meth->oct2point == 0 + && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT)) + { + ECerr(EC_F_EC_POINT_OCT2POINT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; + } + if (group->meth != point->meth) + { + ECerr(EC_F_EC_POINT_OCT2POINT, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + if(group->meth->flags & EC_FLAGS_DEFAULT_OCT) + { + if (group->meth->field_type == NID_X9_62_prime_field) + return ec_GFp_simple_oct2point(group, point, + buf, len, ctx); + else +#ifdef OPENSSL_NO_EC2M + { + ECerr(EC_F_EC_POINT_OCT2POINT, EC_R_GF2M_NOT_SUPPORTED); + return 0; + } +#else + return ec_GF2m_simple_oct2point(group, point, + buf, len, ctx); +#endif + } + return group->meth->oct2point(group, point, buf, len, ctx); + } + diff --git a/app/openssl/crypto/ec/ec_pmeth.c b/app/openssl/crypto/ec/ec_pmeth.c index f433076c..66ee397d 100644 --- a/app/openssl/crypto/ec/ec_pmeth.c +++ b/app/openssl/crypto/ec/ec_pmeth.c @@ -188,7 +188,7 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) pubkey = EC_KEY_get0_public_key(ctx->peerkey->pkey.ec); - /* NB: unlike PKS#3 DH, if *outlen is less than maximum size this is + /* NB: unlike PKCS#3 DH, if *outlen is less than maximum size this is * not an error, the result is truncated. */ @@ -221,6 +221,7 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) case EVP_PKEY_CTRL_MD: if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 && + EVP_MD_type((const EVP_MD *)p2) != NID_ecdsa_with_SHA1 && EVP_MD_type((const EVP_MD *)p2) != NID_sha224 && EVP_MD_type((const EVP_MD *)p2) != NID_sha256 && EVP_MD_type((const EVP_MD *)p2) != NID_sha384 && diff --git a/app/openssl/crypto/ec/eck_prn.c b/app/openssl/crypto/ec/eck_prn.c index 7d3e175a..06de8f39 100644 --- a/app/openssl/crypto/ec/eck_prn.c +++ b/app/openssl/crypto/ec/eck_prn.c @@ -207,7 +207,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) reason = ERR_R_MALLOC_FAILURE; goto err; } - +#ifndef OPENSSL_NO_EC2M if (is_char_two) { if (!EC_GROUP_get_curve_GF2m(x, p, a, b, ctx)) @@ -217,6 +217,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) } } else /* prime field */ +#endif { if (!EC_GROUP_get_curve_GFp(x, p, a, b, ctx)) { diff --git a/app/openssl/crypto/ec/ecp_mont.c b/app/openssl/crypto/ec/ecp_mont.c index 9fc4a466..f04f132c 100644 --- a/app/openssl/crypto/ec/ecp_mont.c +++ b/app/openssl/crypto/ec/ecp_mont.c @@ -63,12 +63,20 @@ #include +#ifdef OPENSSL_FIPS +#include +#endif + #include "ec_lcl.h" const EC_METHOD *EC_GFp_mont_method(void) { +#ifdef OPENSSL_FIPS + return fips_ec_gfp_mont_method(); +#else static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, NID_X9_62_prime_field, ec_GFp_mont_group_init, ec_GFp_mont_group_finish, @@ -87,9 +95,7 @@ const EC_METHOD *EC_GFp_mont_method(void) ec_GFp_simple_get_Jprojective_coordinates_GFp, ec_GFp_simple_point_set_affine_coordinates, ec_GFp_simple_point_get_affine_coordinates, - ec_GFp_simple_set_compressed_coordinates, - ec_GFp_simple_point2oct, - ec_GFp_simple_oct2point, + 0,0,0, ec_GFp_simple_add, ec_GFp_simple_dbl, ec_GFp_simple_invert, @@ -109,6 +115,7 @@ const EC_METHOD *EC_GFp_mont_method(void) ec_GFp_mont_field_set_to_one }; return &ret; +#endif } diff --git a/app/openssl/crypto/ec/ecp_nist.c b/app/openssl/crypto/ec/ecp_nist.c index 2a5682ea..aad2d5f4 100644 --- a/app/openssl/crypto/ec/ecp_nist.c +++ b/app/openssl/crypto/ec/ecp_nist.c @@ -67,9 +67,17 @@ #include #include "ec_lcl.h" +#ifdef OPENSSL_FIPS +#include +#endif + const EC_METHOD *EC_GFp_nist_method(void) { +#ifdef OPENSSL_FIPS + return fips_ec_gfp_nist_method(); +#else static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, NID_X9_62_prime_field, ec_GFp_simple_group_init, ec_GFp_simple_group_finish, @@ -88,9 +96,7 @@ const EC_METHOD *EC_GFp_nist_method(void) ec_GFp_simple_get_Jprojective_coordinates_GFp, ec_GFp_simple_point_set_affine_coordinates, ec_GFp_simple_point_get_affine_coordinates, - ec_GFp_simple_set_compressed_coordinates, - ec_GFp_simple_point2oct, - ec_GFp_simple_oct2point, + 0,0,0, ec_GFp_simple_add, ec_GFp_simple_dbl, ec_GFp_simple_invert, @@ -110,6 +116,7 @@ const EC_METHOD *EC_GFp_nist_method(void) 0 /* field_set_to_one */ }; return &ret; +#endif } int ec_GFp_nist_group_copy(EC_GROUP *dest, const EC_GROUP *src) diff --git a/app/openssl/crypto/ec/ecp_oct.c b/app/openssl/crypto/ec/ecp_oct.c new file mode 100644 index 00000000..374a0ee7 --- /dev/null +++ b/app/openssl/crypto/ec/ecp_oct.c @@ -0,0 +1,433 @@ +/* crypto/ec/ecp_oct.c */ +/* Includes code written by Lenka Fibikova + * for the OpenSSL project. + * Includes code written by Bodo Moeller for the OpenSSL project. +*/ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * Portions of this software developed by SUN MICROSYSTEMS, INC., + * and contributed to the OpenSSL project. + */ + +#include +#include + +#include "ec_lcl.h" + +int ec_GFp_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point, + const BIGNUM *x_, int y_bit, BN_CTX *ctx) + { + BN_CTX *new_ctx = NULL; + BIGNUM *tmp1, *tmp2, *x, *y; + int ret = 0; + + /* clear error queue*/ + ERR_clear_error(); + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + y_bit = (y_bit != 0); + + BN_CTX_start(ctx); + tmp1 = BN_CTX_get(ctx); + tmp2 = BN_CTX_get(ctx); + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + if (y == NULL) goto err; + + /* Recover y. We have a Weierstrass equation + * y^2 = x^3 + a*x + b, + * so y is one of the square roots of x^3 + a*x + b. + */ + + /* tmp1 := x^3 */ + if (!BN_nnmod(x, x_, &group->field,ctx)) goto err; + if (group->meth->field_decode == 0) + { + /* field_{sqr,mul} work on standard representation */ + if (!group->meth->field_sqr(group, tmp2, x_, ctx)) goto err; + if (!group->meth->field_mul(group, tmp1, tmp2, x_, ctx)) goto err; + } + else + { + if (!BN_mod_sqr(tmp2, x_, &group->field, ctx)) goto err; + if (!BN_mod_mul(tmp1, tmp2, x_, &group->field, ctx)) goto err; + } + + /* tmp1 := tmp1 + a*x */ + if (group->a_is_minus3) + { + if (!BN_mod_lshift1_quick(tmp2, x, &group->field)) goto err; + if (!BN_mod_add_quick(tmp2, tmp2, x, &group->field)) goto err; + if (!BN_mod_sub_quick(tmp1, tmp1, tmp2, &group->field)) goto err; + } + else + { + if (group->meth->field_decode) + { + if (!group->meth->field_decode(group, tmp2, &group->a, ctx)) goto err; + if (!BN_mod_mul(tmp2, tmp2, x, &group->field, ctx)) goto err; + } + else + { + /* field_mul works on standard representation */ + if (!group->meth->field_mul(group, tmp2, &group->a, x, ctx)) goto err; + } + + if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err; + } + + /* tmp1 := tmp1 + b */ + if (group->meth->field_decode) + { + if (!group->meth->field_decode(group, tmp2, &group->b, ctx)) goto err; + if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err; + } + else + { + if (!BN_mod_add_quick(tmp1, tmp1, &group->b, &group->field)) goto err; + } + + if (!BN_mod_sqrt(y, tmp1, &group->field, ctx)) + { + unsigned long err = ERR_peek_last_error(); + + if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NOT_A_SQUARE) + { + ERR_clear_error(); + ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); + } + else + ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB); + goto err; + } + + if (y_bit != BN_is_odd(y)) + { + if (BN_is_zero(y)) + { + int kron; + + kron = BN_kronecker(x, &group->field, ctx); + if (kron == -2) goto err; + + if (kron == 1) + ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSION_BIT); + else + /* BN_mod_sqrt() should have cought this error (not a square) */ + ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); + goto err; + } + if (!BN_usub(y, &group->field, y)) goto err; + } + if (y_bit != BN_is_odd(y)) + { + ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_INTERNAL_ERROR); + goto err; + } + + if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + } + + +size_t ec_GFp_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, + unsigned char *buf, size_t len, BN_CTX *ctx) + { + size_t ret; + BN_CTX *new_ctx = NULL; + int used_ctx = 0; + BIGNUM *x, *y; + size_t field_len, i, skip; + + if ((form != POINT_CONVERSION_COMPRESSED) + && (form != POINT_CONVERSION_UNCOMPRESSED) + && (form != POINT_CONVERSION_HYBRID)) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_INVALID_FORM); + goto err; + } + + if (EC_POINT_is_at_infinity(group, point)) + { + /* encodes to a single 0 octet */ + if (buf != NULL) + { + if (len < 1) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); + return 0; + } + buf[0] = 0; + } + return 1; + } + + + /* ret := required output buffer length */ + field_len = BN_num_bytes(&group->field); + ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; + + /* if 'buf' is NULL, just return required length */ + if (buf != NULL) + { + if (len < ret) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); + goto err; + } + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + BN_CTX_start(ctx); + used_ctx = 1; + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + if (y == NULL) goto err; + + if (!EC_POINT_get_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; + + if ((form == POINT_CONVERSION_COMPRESSED || form == POINT_CONVERSION_HYBRID) && BN_is_odd(y)) + buf[0] = form + 1; + else + buf[0] = form; + + i = 1; + + skip = field_len - BN_num_bytes(x); + if (skip > field_len) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + while (skip > 0) + { + buf[i++] = 0; + skip--; + } + skip = BN_bn2bin(x, buf + i); + i += skip; + if (i != 1 + field_len) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + + if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID) + { + skip = field_len - BN_num_bytes(y); + if (skip > field_len) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + while (skip > 0) + { + buf[i++] = 0; + skip--; + } + skip = BN_bn2bin(y, buf + i); + i += skip; + } + + if (i != ret) + { + ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); + goto err; + } + } + + if (used_ctx) + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + + err: + if (used_ctx) + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return 0; + } + + +int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point, + const unsigned char *buf, size_t len, BN_CTX *ctx) + { + point_conversion_form_t form; + int y_bit; + BN_CTX *new_ctx = NULL; + BIGNUM *x, *y; + size_t field_len, enc_len; + int ret = 0; + + if (len == 0) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL); + return 0; + } + form = buf[0]; + y_bit = form & 1; + form = form & ~1U; + if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED) + && (form != POINT_CONVERSION_UNCOMPRESSED) + && (form != POINT_CONVERSION_HYBRID)) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + if (form == 0) + { + if (len != 1) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + return EC_POINT_set_to_infinity(group, point); + } + + field_len = BN_num_bytes(&group->field); + enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; + + if (len != enc_len) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + return 0; + } + + if (ctx == NULL) + { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + return 0; + } + + BN_CTX_start(ctx); + x = BN_CTX_get(ctx); + y = BN_CTX_get(ctx); + if (y == NULL) goto err; + + if (!BN_bin2bn(buf + 1, field_len, x)) goto err; + if (BN_ucmp(x, &group->field) >= 0) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + + if (form == POINT_CONVERSION_COMPRESSED) + { + if (!EC_POINT_set_compressed_coordinates_GFp(group, point, x, y_bit, ctx)) goto err; + } + else + { + if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err; + if (BN_ucmp(y, &group->field) >= 0) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + if (form == POINT_CONVERSION_HYBRID) + { + if (y_bit != BN_is_odd(y)) + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); + goto err; + } + } + + if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; + } + + if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */ + { + ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); + goto err; + } + + ret = 1; + + err: + BN_CTX_end(ctx); + if (new_ctx != NULL) + BN_CTX_free(new_ctx); + return ret; + } + diff --git a/app/openssl/crypto/ec/ecp_smpl.c b/app/openssl/crypto/ec/ecp_smpl.c index 66a92e2a..7cbb321f 100644 --- a/app/openssl/crypto/ec/ecp_smpl.c +++ b/app/openssl/crypto/ec/ecp_smpl.c @@ -65,11 +65,19 @@ #include #include +#ifdef OPENSSL_FIPS +#include +#endif + #include "ec_lcl.h" const EC_METHOD *EC_GFp_simple_method(void) { +#ifdef OPENSSL_FIPS + return fips_ec_gfp_simple_method(); +#else static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, NID_X9_62_prime_field, ec_GFp_simple_group_init, ec_GFp_simple_group_finish, @@ -88,9 +96,7 @@ const EC_METHOD *EC_GFp_simple_method(void) ec_GFp_simple_get_Jprojective_coordinates_GFp, ec_GFp_simple_point_set_affine_coordinates, ec_GFp_simple_point_get_affine_coordinates, - ec_GFp_simple_set_compressed_coordinates, - ec_GFp_simple_point2oct, - ec_GFp_simple_oct2point, + 0,0,0, ec_GFp_simple_add, ec_GFp_simple_dbl, ec_GFp_simple_invert, @@ -110,6 +116,7 @@ const EC_METHOD *EC_GFp_simple_method(void) 0 /* field_set_to_one */ }; return &ret; +#endif } @@ -633,372 +640,6 @@ int ec_GFp_simple_point_get_affine_coordinates(const EC_GROUP *group, const EC_P return ret; } - -int ec_GFp_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point, - const BIGNUM *x_, int y_bit, BN_CTX *ctx) - { - BN_CTX *new_ctx = NULL; - BIGNUM *tmp1, *tmp2, *x, *y; - int ret = 0; - - /* clear error queue*/ - ERR_clear_error(); - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - y_bit = (y_bit != 0); - - BN_CTX_start(ctx); - tmp1 = BN_CTX_get(ctx); - tmp2 = BN_CTX_get(ctx); - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - if (y == NULL) goto err; - - /* Recover y. We have a Weierstrass equation - * y^2 = x^3 + a*x + b, - * so y is one of the square roots of x^3 + a*x + b. - */ - - /* tmp1 := x^3 */ - if (!BN_nnmod(x, x_, &group->field,ctx)) goto err; - if (group->meth->field_decode == 0) - { - /* field_{sqr,mul} work on standard representation */ - if (!group->meth->field_sqr(group, tmp2, x_, ctx)) goto err; - if (!group->meth->field_mul(group, tmp1, tmp2, x_, ctx)) goto err; - } - else - { - if (!BN_mod_sqr(tmp2, x_, &group->field, ctx)) goto err; - if (!BN_mod_mul(tmp1, tmp2, x_, &group->field, ctx)) goto err; - } - - /* tmp1 := tmp1 + a*x */ - if (group->a_is_minus3) - { - if (!BN_mod_lshift1_quick(tmp2, x, &group->field)) goto err; - if (!BN_mod_add_quick(tmp2, tmp2, x, &group->field)) goto err; - if (!BN_mod_sub_quick(tmp1, tmp1, tmp2, &group->field)) goto err; - } - else - { - if (group->meth->field_decode) - { - if (!group->meth->field_decode(group, tmp2, &group->a, ctx)) goto err; - if (!BN_mod_mul(tmp2, tmp2, x, &group->field, ctx)) goto err; - } - else - { - /* field_mul works on standard representation */ - if (!group->meth->field_mul(group, tmp2, &group->a, x, ctx)) goto err; - } - - if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err; - } - - /* tmp1 := tmp1 + b */ - if (group->meth->field_decode) - { - if (!group->meth->field_decode(group, tmp2, &group->b, ctx)) goto err; - if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err; - } - else - { - if (!BN_mod_add_quick(tmp1, tmp1, &group->b, &group->field)) goto err; - } - - if (!BN_mod_sqrt(y, tmp1, &group->field, ctx)) - { - unsigned long err = ERR_peek_last_error(); - - if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NOT_A_SQUARE) - { - ERR_clear_error(); - ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); - } - else - ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB); - goto err; - } - - if (y_bit != BN_is_odd(y)) - { - if (BN_is_zero(y)) - { - int kron; - - kron = BN_kronecker(x, &group->field, ctx); - if (kron == -2) goto err; - - if (kron == 1) - ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSION_BIT); - else - /* BN_mod_sqrt() should have cought this error (not a square) */ - ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT); - goto err; - } - if (!BN_usub(y, &group->field, y)) goto err; - } - if (y_bit != BN_is_odd(y)) - { - ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_INTERNAL_ERROR); - goto err; - } - - if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; - - ret = 1; - - err: - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - } - - -size_t ec_GFp_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form, - unsigned char *buf, size_t len, BN_CTX *ctx) - { - size_t ret; - BN_CTX *new_ctx = NULL; - int used_ctx = 0; - BIGNUM *x, *y; - size_t field_len, i, skip; - - if ((form != POINT_CONVERSION_COMPRESSED) - && (form != POINT_CONVERSION_UNCOMPRESSED) - && (form != POINT_CONVERSION_HYBRID)) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_INVALID_FORM); - goto err; - } - - if (EC_POINT_is_at_infinity(group, point)) - { - /* encodes to a single 0 octet */ - if (buf != NULL) - { - if (len < 1) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); - return 0; - } - buf[0] = 0; - } - return 1; - } - - - /* ret := required output buffer length */ - field_len = BN_num_bytes(&group->field); - ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; - - /* if 'buf' is NULL, just return required length */ - if (buf != NULL) - { - if (len < ret) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL); - goto err; - } - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - BN_CTX_start(ctx); - used_ctx = 1; - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - if (y == NULL) goto err; - - if (!EC_POINT_get_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; - - if ((form == POINT_CONVERSION_COMPRESSED || form == POINT_CONVERSION_HYBRID) && BN_is_odd(y)) - buf[0] = form + 1; - else - buf[0] = form; - - i = 1; - - skip = field_len - BN_num_bytes(x); - if (skip > field_len) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - while (skip > 0) - { - buf[i++] = 0; - skip--; - } - skip = BN_bn2bin(x, buf + i); - i += skip; - if (i != 1 + field_len) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - - if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID) - { - skip = field_len - BN_num_bytes(y); - if (skip > field_len) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - while (skip > 0) - { - buf[i++] = 0; - skip--; - } - skip = BN_bn2bin(y, buf + i); - i += skip; - } - - if (i != ret) - { - ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR); - goto err; - } - } - - if (used_ctx) - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - - err: - if (used_ctx) - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return 0; - } - - -int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point, - const unsigned char *buf, size_t len, BN_CTX *ctx) - { - point_conversion_form_t form; - int y_bit; - BN_CTX *new_ctx = NULL; - BIGNUM *x, *y; - size_t field_len, enc_len; - int ret = 0; - - if (len == 0) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL); - return 0; - } - form = buf[0]; - y_bit = form & 1; - form = form & ~1U; - if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED) - && (form != POINT_CONVERSION_UNCOMPRESSED) - && (form != POINT_CONVERSION_HYBRID)) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - if (form == 0) - { - if (len != 1) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - return EC_POINT_set_to_infinity(group, point); - } - - field_len = BN_num_bytes(&group->field); - enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len; - - if (len != enc_len) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - return 0; - } - - if (ctx == NULL) - { - ctx = new_ctx = BN_CTX_new(); - if (ctx == NULL) - return 0; - } - - BN_CTX_start(ctx); - x = BN_CTX_get(ctx); - y = BN_CTX_get(ctx); - if (y == NULL) goto err; - - if (!BN_bin2bn(buf + 1, field_len, x)) goto err; - if (BN_ucmp(x, &group->field) >= 0) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - - if (form == POINT_CONVERSION_COMPRESSED) - { - if (!EC_POINT_set_compressed_coordinates_GFp(group, point, x, y_bit, ctx)) goto err; - } - else - { - if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err; - if (BN_ucmp(y, &group->field) >= 0) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - if (form == POINT_CONVERSION_HYBRID) - { - if (y_bit != BN_is_odd(y)) - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING); - goto err; - } - } - - if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err; - } - - if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */ - { - ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); - goto err; - } - - ret = 1; - - err: - BN_CTX_end(ctx); - if (new_ctx != NULL) - BN_CTX_free(new_ctx); - return ret; - } - - int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx) { int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *, BN_CTX *); diff --git a/app/openssl/crypto/ec/ectest.c b/app/openssl/crypto/ec/ectest.c index 7509cb9c..102eaa9b 100644 --- a/app/openssl/crypto/ec/ectest.c +++ b/app/openssl/crypto/ec/ectest.c @@ -94,6 +94,7 @@ int main(int argc, char * argv[]) { puts("Elliptic curves are disabled."); retur #include #include #include +#include #if defined(_MSC_VER) && defined(_MIPS_) && (_MSC_VER/100==12) /* suppress "too big too optimize" warning */ @@ -107,10 +108,6 @@ int main(int argc, char * argv[]) { puts("Elliptic curves are disabled."); retur EXIT(1); \ } while (0) -void prime_field_tests(void); -void char2_field_tests(void); -void internal_curve_test(void); - #define TIMING_BASE_PT 0 #define TIMING_RAND_PT 1 #define TIMING_SIMUL 2 @@ -195,8 +192,51 @@ static void timings(EC_GROUP *group, int type, BN_CTX *ctx) } #endif -void prime_field_tests() - { +/* test multiplication with group order, long and negative scalars */ +static void group_order_tests(EC_GROUP *group) + { + BIGNUM *n1, *n2, *order; + EC_POINT *P = EC_POINT_new(group); + EC_POINT *Q = EC_POINT_new(group); + BN_CTX *ctx = BN_CTX_new(); + + n1 = BN_new(); n2 = BN_new(); order = BN_new(); + fprintf(stdout, "verify group order ..."); + fflush(stdout); + if (!EC_GROUP_get_order(group, order, ctx)) ABORT; + if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) ABORT; + if (!EC_POINT_is_at_infinity(group, Q)) ABORT; + fprintf(stdout, "."); + fflush(stdout); + if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; + if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) ABORT; + if (!EC_POINT_is_at_infinity(group, Q)) ABORT; + fprintf(stdout, " ok\n"); + fprintf(stdout, "long/negative scalar tests ... "); + if (!BN_one(n1)) ABORT; + /* n1 = 1 - order */ + if (!BN_sub(n1, n1, order)) ABORT; + if(!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) ABORT; + if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; + /* n2 = 1 + order */ + if (!BN_add(n2, order, BN_value_one())) ABORT; + if(!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; + if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; + /* n2 = (1 - order) * (1 + order) */ + if (!BN_mul(n2, n1, n2, ctx)) ABORT; + if(!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; + if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; + fprintf(stdout, "ok\n"); + EC_POINT_free(P); + EC_POINT_free(Q); + BN_free(n1); + BN_free(n2); + BN_free(order); + BN_CTX_free(ctx); + } + +static void prime_field_tests(void) + { BN_CTX *ctx = NULL; BIGNUM *p, *a, *b; EC_GROUP *group; @@ -321,21 +361,21 @@ void prime_field_tests() if (len == 0) ABORT; if (!EC_POINT_oct2point(group, P, buf, len, ctx)) ABORT; if (0 != EC_POINT_cmp(group, P, Q, ctx)) ABORT; - fprintf(stdout, "Generator as octect string, compressed form:\n "); + fprintf(stdout, "Generator as octet string, compressed form:\n "); for (i = 0; i < len; i++) fprintf(stdout, "%02X", buf[i]); len = EC_POINT_point2oct(group, Q, POINT_CONVERSION_UNCOMPRESSED, buf, sizeof buf, ctx); if (len == 0) ABORT; if (!EC_POINT_oct2point(group, P, buf, len, ctx)) ABORT; if (0 != EC_POINT_cmp(group, P, Q, ctx)) ABORT; - fprintf(stdout, "\nGenerator as octect string, uncompressed form:\n "); + fprintf(stdout, "\nGenerator as octet string, uncompressed form:\n "); for (i = 0; i < len; i++) fprintf(stdout, "%02X", buf[i]); len = EC_POINT_point2oct(group, Q, POINT_CONVERSION_HYBRID, buf, sizeof buf, ctx); if (len == 0) ABORT; if (!EC_POINT_oct2point(group, P, buf, len, ctx)) ABORT; if (0 != EC_POINT_cmp(group, P, Q, ctx)) ABORT; - fprintf(stdout, "\nGenerator as octect string, hybrid form:\n "); + fprintf(stdout, "\nGenerator as octet string, hybrid form:\n "); for (i = 0; i < len; i++) fprintf(stdout, "%02X", buf[i]); if (!EC_POINT_get_Jprojective_coordinates_GFp(group, R, x, y, z, ctx)) ABORT; @@ -381,17 +421,7 @@ void prime_field_tests() if (EC_GROUP_get_degree(group) != 160) ABORT; fprintf(stdout, " ok\n"); - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + group_order_tests(group); if (!(P_160 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_160, group)) ABORT; @@ -425,17 +455,7 @@ void prime_field_tests() if (EC_GROUP_get_degree(group) != 192) ABORT; fprintf(stdout, " ok\n"); - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + group_order_tests(group); if (!(P_192 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_192, group)) ABORT; @@ -469,17 +489,7 @@ void prime_field_tests() if (EC_GROUP_get_degree(group) != 224) ABORT; fprintf(stdout, " ok\n"); - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + group_order_tests(group); if (!(P_224 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_224, group)) ABORT; @@ -514,17 +524,7 @@ void prime_field_tests() if (EC_GROUP_get_degree(group) != 256) ABORT; fprintf(stdout, " ok\n"); - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + group_order_tests(group); if (!(P_256 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_256, group)) ABORT; @@ -563,18 +563,8 @@ void prime_field_tests() fprintf(stdout, "verify degree ..."); if (EC_GROUP_get_degree(group) != 384) ABORT; fprintf(stdout, " ok\n"); - - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + + group_order_tests(group); if (!(P_384 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_384, group)) ABORT; @@ -619,18 +609,8 @@ void prime_field_tests() fprintf(stdout, "verify degree ..."); if (EC_GROUP_get_degree(group) != 521) ABORT; fprintf(stdout, " ok\n"); - - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; - fprintf(stdout, " ok\n"); + + group_order_tests(group); if (!(P_521 = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; if (!EC_GROUP_copy(P_521, group)) ABORT; @@ -659,6 +639,7 @@ void prime_field_tests() points[2] = Q; points[3] = Q; + if (!EC_GROUP_get_order(group, z, ctx)) ABORT; if (!BN_add(y, z, BN_value_one())) ABORT; if (BN_is_odd(y)) ABORT; if (!BN_rshift1(y, y)) ABORT; @@ -792,22 +773,14 @@ void prime_field_tests() fprintf(stdout, "verify degree ..."); \ if (EC_GROUP_get_degree(group) != _degree) ABORT; \ fprintf(stdout, " ok\n"); \ - fprintf(stdout, "verify group order ..."); \ - fflush(stdout); \ - if (!EC_GROUP_get_order(group, z, ctx)) ABORT; \ - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; \ - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; \ - fprintf(stdout, "."); \ - fflush(stdout); \ - if (!EC_GROUP_precompute_mult(group, ctx)) ABORT; \ - if (!EC_POINT_mul(group, Q, z, NULL, NULL, ctx)) ABORT; \ - if (!EC_POINT_is_at_infinity(group, Q)) ABORT; \ - fprintf(stdout, " ok\n"); \ + group_order_tests(group); \ if (!(_variable = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; \ - if (!EC_GROUP_copy(_variable, group)) ABORT; + if (!EC_GROUP_copy(_variable, group)) ABORT; \ -void char2_field_tests() - { +#ifndef OPENSSL_NO_EC2M + +static void char2_field_tests(void) + { BN_CTX *ctx = NULL; BIGNUM *p, *a, *b; EC_GROUP *group; @@ -1239,8 +1212,9 @@ void char2_field_tests() if (C2_B571) EC_GROUP_free(C2_B571); } +#endif -void internal_curve_test(void) +static void internal_curve_test(void) { EC_builtin_curve *curves = NULL; size_t crv_len = 0, n = 0; @@ -1287,13 +1261,189 @@ void internal_curve_test(void) EC_GROUP_free(group); } if (ok) - fprintf(stdout, " ok\n"); + fprintf(stdout, " ok\n\n"); else - fprintf(stdout, " failed\n"); + { + fprintf(stdout, " failed\n\n"); + ABORT; + } OPENSSL_free(curves); return; } +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +/* nistp_test_params contains magic numbers for testing our optimized + * implementations of several NIST curves with characteristic > 3. */ +struct nistp_test_params + { + const EC_METHOD* (*meth) (); + int degree; + /* Qx, Qy and D are taken from + * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/ECDSA_Prime.pdf + * Otherwise, values are standard curve parameters from FIPS 180-3 */ + const char *p, *a, *b, *Qx, *Qy, *Gx, *Gy, *order, *d; + }; + +static const struct nistp_test_params nistp_tests_params[] = + { + { + /* P-224 */ + EC_GFp_nistp224_method, + 224, + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", /* p */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", /* a */ + "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4", /* b */ + "E84FB0B8E7000CB657D7973CF6B42ED78B301674276DF744AF130B3E", /* Qx */ + "4376675C6FC5612C21A0FF2D2A89D2987DF7A2BC52183B5982298555", /* Qy */ + "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21", /* Gx */ + "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34", /* Gy */ + "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D", /* order */ + "3F0C488E987C80BE0FEE521F8D90BE6034EC69AE11CA72AA777481E8", /* d */ + }, + { + /* P-256 */ + EC_GFp_nistp256_method, + 256, + "ffffffff00000001000000000000000000000000ffffffffffffffffffffffff", /* p */ + "ffffffff00000001000000000000000000000000fffffffffffffffffffffffc", /* a */ + "5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", /* b */ + "b7e08afdfe94bad3f1dc8c734798ba1c62b3a0ad1e9ea2a38201cd0889bc7a19", /* Qx */ + "3603f747959dbf7a4bb226e41928729063adc7ae43529e61b563bbc606cc5e09", /* Qy */ + "6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", /* Gx */ + "4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", /* Gy */ + "ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", /* order */ + "c477f9f65c22cce20657faa5b2d1d8122336f851a508a1ed04e479c34985bf96", /* d */ + }, + { + /* P-521 */ + EC_GFp_nistp521_method, + 521, + "1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", /* p */ + "1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc", /* a */ + "051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", /* b */ + "0098e91eef9a68452822309c52fab453f5f117c1da8ed796b255e9ab8f6410cca16e59df403a6bdc6ca467a37056b1e54b3005d8ac030decfeb68df18b171885d5c4", /* Qx */ + "0164350c321aecfc1cca1ba4364c9b15656150b4b78d6a48d7d28e7f31985ef17be8554376b72900712c4b83ad668327231526e313f5f092999a4632fd50d946bc2e", /* Qy */ + "c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", /* Gx */ + "11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", /* Gy */ + "1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", /* order */ + "0100085f47b8e1b8b11b7eb33028c0b2888e304bfc98501955b45bba1478dc184eeedf09b86a5f7c21994406072787205e69a63709fe35aa93ba333514b24f961722", /* d */ + }, + }; + +void nistp_single_test(const struct nistp_test_params *test) + { + BN_CTX *ctx; + BIGNUM *p, *a, *b, *x, *y, *n, *m, *order; + EC_GROUP *NISTP; + EC_POINT *G, *P, *Q, *Q_CHECK; + + fprintf(stdout, "\nNIST curve P-%d (optimised implementation):\n", test->degree); + ctx = BN_CTX_new(); + p = BN_new(); + a = BN_new(); + b = BN_new(); + x = BN_new(); y = BN_new(); + m = BN_new(); n = BN_new(); order = BN_new(); + + NISTP = EC_GROUP_new(test->meth()); + if(!NISTP) ABORT; + if (!BN_hex2bn(&p, test->p)) ABORT; + if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) ABORT; + if (!BN_hex2bn(&a, test->a)) ABORT; + if (!BN_hex2bn(&b, test->b)) ABORT; + if (!EC_GROUP_set_curve_GFp(NISTP, p, a, b, ctx)) ABORT; + G = EC_POINT_new(NISTP); + P = EC_POINT_new(NISTP); + Q = EC_POINT_new(NISTP); + Q_CHECK = EC_POINT_new(NISTP); + if(!BN_hex2bn(&x, test->Qx)) ABORT; + if(!BN_hex2bn(&y, test->Qy)) ABORT; + if(!EC_POINT_set_affine_coordinates_GFp(NISTP, Q_CHECK, x, y, ctx)) ABORT; + if (!BN_hex2bn(&x, test->Gx)) ABORT; + if (!BN_hex2bn(&y, test->Gy)) ABORT; + if (!EC_POINT_set_affine_coordinates_GFp(NISTP, G, x, y, ctx)) ABORT; + if (!BN_hex2bn(&order, test->order)) ABORT; + if (!EC_GROUP_set_generator(NISTP, G, order, BN_value_one())) ABORT; + + fprintf(stdout, "verify degree ... "); + if (EC_GROUP_get_degree(NISTP) != test->degree) ABORT; + fprintf(stdout, "ok\n"); + + fprintf(stdout, "NIST test vectors ... "); + if (!BN_hex2bn(&n, test->d)) ABORT; + /* fixed point multiplication */ + EC_POINT_mul(NISTP, Q, n, NULL, NULL, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + /* random point multiplication */ + EC_POINT_mul(NISTP, Q, NULL, G, n, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + + /* set generator to P = 2*G, where G is the standard generator */ + if (!EC_POINT_dbl(NISTP, P, G, ctx)) ABORT; + if (!EC_GROUP_set_generator(NISTP, P, order, BN_value_one())) ABORT; + /* set the scalar to m=n/2, where n is the NIST test scalar */ + if (!BN_rshift(m, n, 1)) ABORT; + + /* test the non-standard generator */ + /* fixed point multiplication */ + EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + /* random point multiplication */ + EC_POINT_mul(NISTP, Q, NULL, P, m, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + + /* now repeat all tests with precomputation */ + if (!EC_GROUP_precompute_mult(NISTP, ctx)) ABORT; + + /* fixed point multiplication */ + EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + /* random point multiplication */ + EC_POINT_mul(NISTP, Q, NULL, P, m, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + + /* reset generator */ + if (!EC_GROUP_set_generator(NISTP, G, order, BN_value_one())) ABORT; + /* fixed point multiplication */ + EC_POINT_mul(NISTP, Q, n, NULL, NULL, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + /* random point multiplication */ + EC_POINT_mul(NISTP, Q, NULL, G, n, ctx); + if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) ABORT; + + fprintf(stdout, "ok\n"); + group_order_tests(NISTP); +#if 0 + timings(NISTP, TIMING_BASE_PT, ctx); + timings(NISTP, TIMING_RAND_PT, ctx); +#endif + EC_GROUP_free(NISTP); + EC_POINT_free(G); + EC_POINT_free(P); + EC_POINT_free(Q); + EC_POINT_free(Q_CHECK); + BN_free(n); + BN_free(m); + BN_free(p); + BN_free(a); + BN_free(b); + BN_free(x); + BN_free(y); + BN_free(order); + BN_CTX_free(ctx); + } + +void nistp_tests() + { + unsigned i; + + for (i = 0; i < sizeof(nistp_tests_params) / sizeof(struct nistp_test_params); i++) + { + nistp_single_test(&nistp_tests_params[i]); + } + } +#endif + static const char rnd_seed[] = "string to make the random number generator think it has entropy"; int main(int argc, char *argv[]) @@ -1317,7 +1467,12 @@ int main(int argc, char *argv[]) prime_field_tests(); puts(""); +#ifndef OPENSSL_NO_EC2M char2_field_tests(); +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 + nistp_tests(); +#endif /* test the internal curves */ internal_curve_test(); diff --git a/app/openssl/crypto/ecdh/ecdh.h b/app/openssl/crypto/ecdh/ecdh.h index b4b58ee6..8887102c 100644 --- a/app/openssl/crypto/ecdh/ecdh.h +++ b/app/openssl/crypto/ecdh/ecdh.h @@ -109,11 +109,13 @@ void ERR_load_ECDH_strings(void); /* Error codes for the ECDH functions. */ /* Function codes. */ +#define ECDH_F_ECDH_CHECK 102 #define ECDH_F_ECDH_COMPUTE_KEY 100 #define ECDH_F_ECDH_DATA_NEW_METHOD 101 /* Reason codes. */ #define ECDH_R_KDF_FAILED 102 +#define ECDH_R_NON_FIPS_METHOD 103 #define ECDH_R_NO_PRIVATE_VALUE 100 #define ECDH_R_POINT_ARITHMETIC_FAILURE 101 diff --git a/app/openssl/crypto/ecdh/ecdhtest.c b/app/openssl/crypto/ecdh/ecdhtest.c index 212a87ef..823d7baa 100644 --- a/app/openssl/crypto/ecdh/ecdhtest.c +++ b/app/openssl/crypto/ecdh/ecdhtest.c @@ -158,11 +158,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out) if (!EC_POINT_get_affine_coordinates_GFp(group, EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; } +#ifndef OPENSSL_NO_EC2M else { if (!EC_POINT_get_affine_coordinates_GF2m(group, EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; } +#endif #ifdef NOISY BIO_puts(out," pri 1="); BN_print(out,a->priv_key); @@ -183,11 +185,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out) if (!EC_POINT_get_affine_coordinates_GFp(group, EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; } +#ifndef OPENSSL_NO_EC2M else { if (!EC_POINT_get_affine_coordinates_GF2m(group, EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; } +#endif #ifdef NOISY BIO_puts(out," pri 2="); @@ -324,6 +328,7 @@ int main(int argc, char *argv[]) if (!test_ecdh_curve(NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) goto err; if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) goto err; if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) goto err; +#ifndef OPENSSL_NO_EC2M /* NIST BINARY CURVES TESTS */ if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) goto err; if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) goto err; @@ -335,6 +340,7 @@ int main(int argc, char *argv[]) if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) goto err; if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) goto err; if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err; +#endif ret = 0; diff --git a/app/openssl/crypto/ecdh/ech_err.c b/app/openssl/crypto/ecdh/ech_err.c index 6f4b0c99..3bd24739 100644 --- a/app/openssl/crypto/ecdh/ech_err.c +++ b/app/openssl/crypto/ecdh/ech_err.c @@ -1,6 +1,6 @@ /* crypto/ecdh/ech_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,6 +70,7 @@ static ERR_STRING_DATA ECDH_str_functs[]= { +{ERR_FUNC(ECDH_F_ECDH_CHECK), "ECDH_CHECK"}, {ERR_FUNC(ECDH_F_ECDH_COMPUTE_KEY), "ECDH_compute_key"}, {ERR_FUNC(ECDH_F_ECDH_DATA_NEW_METHOD), "ECDH_DATA_new_method"}, {0,NULL} @@ -78,6 +79,7 @@ static ERR_STRING_DATA ECDH_str_functs[]= static ERR_STRING_DATA ECDH_str_reasons[]= { {ERR_REASON(ECDH_R_KDF_FAILED) ,"KDF failed"}, +{ERR_REASON(ECDH_R_NON_FIPS_METHOD) ,"non fips method"}, {ERR_REASON(ECDH_R_NO_PRIVATE_VALUE) ,"no private value"}, {ERR_REASON(ECDH_R_POINT_ARITHMETIC_FAILURE),"point arithmetic failure"}, {0,NULL} diff --git a/app/openssl/crypto/ecdh/ech_key.c b/app/openssl/crypto/ecdh/ech_key.c index f44da929..2988899e 100644 --- a/app/openssl/crypto/ecdh/ech_key.c +++ b/app/openssl/crypto/ecdh/ech_key.c @@ -68,9 +68,6 @@ */ #include "ech_locl.h" -#ifndef OPENSSL_NO_ENGINE -#include -#endif int ECDH_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, EC_KEY *eckey, diff --git a/app/openssl/crypto/ecdh/ech_lib.c b/app/openssl/crypto/ecdh/ech_lib.c index 4d8ea03d..0644431b 100644 --- a/app/openssl/crypto/ecdh/ech_lib.c +++ b/app/openssl/crypto/ecdh/ech_lib.c @@ -73,6 +73,9 @@ #include #endif #include +#ifdef OPENSSL_FIPS +#include +#endif const char ECDH_version[]="ECDH" OPENSSL_VERSION_PTEXT; @@ -90,7 +93,16 @@ void ECDH_set_default_method(const ECDH_METHOD *meth) const ECDH_METHOD *ECDH_get_default_method(void) { if(!default_ECDH_method) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ecdh_openssl(); + else + return ECDH_OpenSSL(); +#else default_ECDH_method = ECDH_OpenSSL(); +#endif + } return default_ECDH_method; } @@ -210,11 +222,26 @@ ECDH_DATA *ecdh_check(EC_KEY *key) ecdh_data = (ECDH_DATA *)ecdh_data_new(); if (ecdh_data == NULL) return NULL; - EC_KEY_insert_key_method_data(key, (void *)ecdh_data, - ecdh_data_dup, ecdh_data_free, ecdh_data_free); + data = EC_KEY_insert_key_method_data(key, (void *)ecdh_data, + ecdh_data_dup, ecdh_data_free, ecdh_data_free); + if (data != NULL) + { + /* Another thread raced us to install the key_method + * data and won. */ + ecdh_data_free(ecdh_data); + ecdh_data = (ECDH_DATA *)data; + } } else ecdh_data = (ECDH_DATA *)data; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(ecdh_data->flags & ECDH_FLAG_FIPS_METHOD) + && !(EC_KEY_get_flags(key) & EC_FLAG_NON_FIPS_ALLOW)) + { + ECDHerr(ECDH_F_ECDH_CHECK, ECDH_R_NON_FIPS_METHOD); + return NULL; + } +#endif return ecdh_data; diff --git a/app/openssl/crypto/ecdh/ech_locl.h b/app/openssl/crypto/ecdh/ech_locl.h index f658526a..f6cad6a8 100644 --- a/app/openssl/crypto/ecdh/ech_locl.h +++ b/app/openssl/crypto/ecdh/ech_locl.h @@ -75,6 +75,14 @@ struct ecdh_method char *app_data; }; +/* If this flag is set the ECDH method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its responsibility + * to ensure the result is compliant. + */ + +#define ECDH_FLAG_FIPS_METHOD 0x1 + typedef struct ecdh_data_st { /* EC_KEY_METH_DATA part */ int (*init)(EC_KEY *); diff --git a/app/openssl/crypto/ecdh/ech_ossl.c b/app/openssl/crypto/ecdh/ech_ossl.c index 2a40ff12..4a30628f 100644 --- a/app/openssl/crypto/ecdh/ech_ossl.c +++ b/app/openssl/crypto/ecdh/ech_ossl.c @@ -157,6 +157,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, goto err; } } +#ifndef OPENSSL_NO_EC2M else { if (!EC_POINT_get_affine_coordinates_GF2m(group, tmp, x, y, ctx)) @@ -165,6 +166,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, goto err; } } +#endif buflen = (EC_GROUP_get_degree(group) + 7)/8; len = BN_num_bytes(x); diff --git a/app/openssl/crypto/ecdsa/ecdsa.h b/app/openssl/crypto/ecdsa/ecdsa.h index e61c5398..dc6a36b1 100644 --- a/app/openssl/crypto/ecdsa/ecdsa.h +++ b/app/openssl/crypto/ecdsa/ecdsa.h @@ -238,6 +238,7 @@ void ERR_load_ECDSA_strings(void); /* Error codes for the ECDSA functions. */ /* Function codes. */ +#define ECDSA_F_ECDSA_CHECK 104 #define ECDSA_F_ECDSA_DATA_NEW_METHOD 100 #define ECDSA_F_ECDSA_DO_SIGN 101 #define ECDSA_F_ECDSA_DO_VERIFY 102 @@ -249,6 +250,8 @@ void ERR_load_ECDSA_strings(void); #define ECDSA_R_ERR_EC_LIB 102 #define ECDSA_R_MISSING_PARAMETERS 103 #define ECDSA_R_NEED_NEW_SETUP_VALUES 106 +#define ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED 108 +#define ECDSA_R_NON_FIPS_METHOD 107 #define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104 #define ECDSA_R_SIGNATURE_MALLOC_FAILED 105 diff --git a/app/openssl/crypto/ecdsa/ecdsatest.c b/app/openssl/crypto/ecdsa/ecdsatest.c index 26a4a9ee..537bb303 100644 --- a/app/openssl/crypto/ecdsa/ecdsatest.c +++ b/app/openssl/crypto/ecdsa/ecdsatest.c @@ -262,6 +262,7 @@ int x9_62_tests(BIO *out) "3238135532097973577080787768312505059318910517550078427819" "78505179448783")) goto x962_err; +#ifndef OPENSSL_NO_EC2M if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1, "87194383164871543355722284926904419997237591535066528048", "308992691965804947361541664549085895292153777025772063598")) @@ -272,7 +273,7 @@ int x9_62_tests(BIO *out) "1970303740007316867383349976549972270528498040721988191026" "49413465737174")) goto x962_err; - +#endif ret = 1; x962_err: if (!restore_rand()) @@ -286,9 +287,13 @@ int test_builtin(BIO *out) size_t crv_len = 0, n = 0; EC_KEY *eckey = NULL, *wrong_eckey = NULL; EC_GROUP *group; + ECDSA_SIG *ecdsa_sig = NULL; unsigned char digest[20], wrong_digest[20]; - unsigned char *signature = NULL; - unsigned int sig_len; + unsigned char *signature = NULL; + const unsigned char *sig_ptr; + unsigned char *sig_ptr2; + unsigned char *raw_buf = NULL; + unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len; int nid, ret = 0; /* fill digest values with some random data */ @@ -338,7 +343,8 @@ int test_builtin(BIO *out) if (EC_KEY_set_group(eckey, group) == 0) goto builtin_err; EC_GROUP_free(group); - if (EC_GROUP_get_degree(EC_KEY_get0_group(eckey)) < 160) + degree = EC_GROUP_get_degree(EC_KEY_get0_group(eckey)); + if (degree < 160) /* drop the curve */ { EC_KEY_free(eckey); @@ -414,26 +420,89 @@ int test_builtin(BIO *out) } BIO_printf(out, "."); (void)BIO_flush(out); - /* modify a single byte of the signature */ - offset = signature[10] % sig_len; - dirt = signature[11]; - signature[offset] ^= dirt ? dirt : 1; + /* wrong length */ + if (ECDSA_verify(0, digest, 20, signature, sig_len - 1, + eckey) == 1) + { + BIO_printf(out, " failed\n"); + goto builtin_err; + } + BIO_printf(out, "."); + (void)BIO_flush(out); + + /* Modify a single byte of the signature: to ensure we don't + * garble the ASN1 structure, we read the raw signature and + * modify a byte in one of the bignums directly. */ + sig_ptr = signature; + if ((ecdsa_sig = d2i_ECDSA_SIG(NULL, &sig_ptr, sig_len)) == NULL) + { + BIO_printf(out, " failed\n"); + goto builtin_err; + } + + /* Store the two BIGNUMs in raw_buf. */ + r_len = BN_num_bytes(ecdsa_sig->r); + s_len = BN_num_bytes(ecdsa_sig->s); + bn_len = (degree + 7) / 8; + if ((r_len > bn_len) || (s_len > bn_len)) + { + BIO_printf(out, " failed\n"); + goto builtin_err; + } + buf_len = 2 * bn_len; + if ((raw_buf = OPENSSL_malloc(buf_len)) == NULL) + goto builtin_err; + /* Pad the bignums with leading zeroes. */ + memset(raw_buf, 0, buf_len); + BN_bn2bin(ecdsa_sig->r, raw_buf + bn_len - r_len); + BN_bn2bin(ecdsa_sig->s, raw_buf + buf_len - s_len); + + /* Modify a single byte in the buffer. */ + offset = raw_buf[10] % buf_len; + dirt = raw_buf[11] ? raw_buf[11] : 1; + raw_buf[offset] ^= dirt; + /* Now read the BIGNUMs back in from raw_buf. */ + if ((BN_bin2bn(raw_buf, bn_len, ecdsa_sig->r) == NULL) || + (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) + goto builtin_err; + + sig_ptr2 = signature; + sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2); if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1) { BIO_printf(out, " failed\n"); goto builtin_err; } + /* Sanity check: undo the modification and verify signature. */ + raw_buf[offset] ^= dirt; + if ((BN_bin2bn(raw_buf, bn_len, ecdsa_sig->r) == NULL) || + (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) + goto builtin_err; + + sig_ptr2 = signature; + sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2); + if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1) + { + BIO_printf(out, " failed\n"); + goto builtin_err; + } BIO_printf(out, "."); (void)BIO_flush(out); BIO_printf(out, " ok\n"); /* cleanup */ + /* clean bogus errors */ + ERR_clear_error(); OPENSSL_free(signature); signature = NULL; EC_KEY_free(eckey); eckey = NULL; EC_KEY_free(wrong_eckey); wrong_eckey = NULL; + ECDSA_SIG_free(ecdsa_sig); + ecdsa_sig = NULL; + OPENSSL_free(raw_buf); + raw_buf = NULL; } ret = 1; @@ -442,8 +511,12 @@ builtin_err: EC_KEY_free(eckey); if (wrong_eckey) EC_KEY_free(wrong_eckey); + if (ecdsa_sig) + ECDSA_SIG_free(ecdsa_sig); if (signature) OPENSSL_free(signature); + if (raw_buf) + OPENSSL_free(raw_buf); if (curves) OPENSSL_free(curves); diff --git a/app/openssl/crypto/ecdsa/ecs_err.c b/app/openssl/crypto/ecdsa/ecs_err.c index 98e38d53..7406c6d8 100644 --- a/app/openssl/crypto/ecdsa/ecs_err.c +++ b/app/openssl/crypto/ecdsa/ecs_err.c @@ -1,6 +1,6 @@ /* crypto/ecdsa/ecs_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,6 +70,7 @@ static ERR_STRING_DATA ECDSA_str_functs[]= { +{ERR_FUNC(ECDSA_F_ECDSA_CHECK), "ECDSA_CHECK"}, {ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"}, @@ -84,6 +85,8 @@ static ERR_STRING_DATA ECDSA_str_reasons[]= {ERR_REASON(ECDSA_R_ERR_EC_LIB) ,"err ec lib"}, {ERR_REASON(ECDSA_R_MISSING_PARAMETERS) ,"missing parameters"}, {ERR_REASON(ECDSA_R_NEED_NEW_SETUP_VALUES),"need new setup values"}, +{ERR_REASON(ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"}, +{ERR_REASON(ECDSA_R_NON_FIPS_METHOD) ,"non fips method"}, {ERR_REASON(ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED),"random number generation failed"}, {ERR_REASON(ECDSA_R_SIGNATURE_MALLOC_FAILED),"signature malloc failed"}, {0,NULL} diff --git a/app/openssl/crypto/ecdsa/ecs_lib.c b/app/openssl/crypto/ecdsa/ecs_lib.c index 2ebae3aa..814a6bf4 100644 --- a/app/openssl/crypto/ecdsa/ecs_lib.c +++ b/app/openssl/crypto/ecdsa/ecs_lib.c @@ -60,6 +60,9 @@ #endif #include #include +#ifdef OPENSSL_FIPS +#include +#endif const char ECDSA_version[]="ECDSA" OPENSSL_VERSION_PTEXT; @@ -77,7 +80,16 @@ void ECDSA_set_default_method(const ECDSA_METHOD *meth) const ECDSA_METHOD *ECDSA_get_default_method(void) { if(!default_ECDSA_method) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ecdsa_openssl(); + else + return ECDSA_OpenSSL(); +#else default_ECDSA_method = ECDSA_OpenSSL(); +#endif + } return default_ECDSA_method; } @@ -188,12 +200,26 @@ ECDSA_DATA *ecdsa_check(EC_KEY *key) ecdsa_data = (ECDSA_DATA *)ecdsa_data_new(); if (ecdsa_data == NULL) return NULL; - EC_KEY_insert_key_method_data(key, (void *)ecdsa_data, - ecdsa_data_dup, ecdsa_data_free, ecdsa_data_free); + data = EC_KEY_insert_key_method_data(key, (void *)ecdsa_data, + ecdsa_data_dup, ecdsa_data_free, ecdsa_data_free); + if (data != NULL) + { + /* Another thread raced us to install the key_method + * data and won. */ + ecdsa_data_free(ecdsa_data); + ecdsa_data = (ECDSA_DATA *)data; + } } else ecdsa_data = (ECDSA_DATA *)data; - +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(ecdsa_data->flags & ECDSA_FLAG_FIPS_METHOD) + && !(EC_KEY_get_flags(key) & EC_FLAG_NON_FIPS_ALLOW)) + { + ECDSAerr(ECDSA_F_ECDSA_CHECK, ECDSA_R_NON_FIPS_METHOD); + return NULL; + } +#endif return ecdsa_data; } diff --git a/app/openssl/crypto/ecdsa/ecs_locl.h b/app/openssl/crypto/ecdsa/ecs_locl.h index 3a69a840..46f7ad91 100644 --- a/app/openssl/crypto/ecdsa/ecs_locl.h +++ b/app/openssl/crypto/ecdsa/ecs_locl.h @@ -70,8 +70,9 @@ struct ecdsa_method const char *name; ECDSA_SIG *(*ecdsa_do_sign)(const unsigned char *dgst, int dgst_len, const BIGNUM *inv, const BIGNUM *rp, EC_KEY *eckey); - int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx, BIGNUM **kinv, - BIGNUM **r); + int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx, + BIGNUM **kinv, BIGNUM **r, + const unsigned char *dgst, int dlen); int (*ecdsa_do_verify)(const unsigned char *dgst, int dgst_len, const ECDSA_SIG *sig, EC_KEY *eckey); #if 0 @@ -82,6 +83,14 @@ struct ecdsa_method char *app_data; }; +/* If this flag is set the ECDSA method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its responsibility + * to ensure the result is compliant. + */ + +#define ECDSA_FLAG_FIPS_METHOD 0x1 + typedef struct ecdsa_data_st { /* EC_KEY_METH_DATA part */ int (*init)(EC_KEY *); diff --git a/app/openssl/crypto/ecdsa/ecs_ossl.c b/app/openssl/crypto/ecdsa/ecs_ossl.c index 1bbf328d..325aca8c 100644 --- a/app/openssl/crypto/ecdsa/ecs_ossl.c +++ b/app/openssl/crypto/ecdsa/ecs_ossl.c @@ -60,11 +60,13 @@ #include #include #include +#include static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dlen, const BIGNUM *, const BIGNUM *, EC_KEY *eckey); -static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, - BIGNUM **rp); +static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen); static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, const ECDSA_SIG *sig, EC_KEY *eckey); @@ -86,8 +88,9 @@ const ECDSA_METHOD *ECDSA_OpenSSL(void) return &openssl_ecdsa_meth; } -static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, - BIGNUM **rp) +static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen) { BN_CTX *ctx = NULL; BIGNUM *k = NULL, *r = NULL, *order = NULL, *X = NULL; @@ -136,11 +139,28 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, { /* get random k */ do - if (!BN_rand_range(k, order)) +#ifndef OPENSSL_NO_SHA512 + if (EC_KEY_get_nonce_from_hash(eckey)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, - ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED); - goto err; + if (!BN_generate_dsa_nonce( + k, order, + EC_KEY_get0_private_key(eckey), + dgst, dlen, ctx)) + { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, + ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED); + goto err; + } + } + else +#endif + { + if (!BN_rand_range(k, order)) + { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, + ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED); + goto err; + } } while (BN_is_zero(k)); @@ -167,6 +187,7 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, goto err; } } +#ifndef OPENSSL_NO_EC2M else /* NID_X9_62_characteristic_two_field */ { if (!EC_POINT_get_affine_coordinates_GF2m(group, @@ -176,6 +197,7 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, goto err; } } +#endif if (!BN_nnmod(r, X, order, ctx)) { ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); @@ -280,7 +302,7 @@ static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dgst_len, { if (in_kinv == NULL || in_r == NULL) { - if (!ECDSA_sign_setup(eckey, ctx, &kinv, &ret->r)) + if (!ecdsa->meth->ecdsa_sign_setup(eckey, ctx, &kinv, &ret->r, dgst, dgst_len)) { ECDSAerr(ECDSA_F_ECDSA_DO_SIGN,ERR_R_ECDSA_LIB); goto err; @@ -454,6 +476,7 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, goto err; } } +#ifndef OPENSSL_NO_EC2M else /* NID_X9_62_characteristic_two_field */ { if (!EC_POINT_get_affine_coordinates_GF2m(group, @@ -463,7 +486,7 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len, goto err; } } - +#endif if (!BN_nnmod(u1, X, order, ctx)) { ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB); diff --git a/app/openssl/crypto/ecdsa/ecs_sign.c b/app/openssl/crypto/ecdsa/ecs_sign.c index 353d5af5..ea79a24b 100644 --- a/app/openssl/crypto/ecdsa/ecs_sign.c +++ b/app/openssl/crypto/ecdsa/ecs_sign.c @@ -58,6 +58,7 @@ #include #endif #include +#include ECDSA_SIG *ECDSA_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) { @@ -102,5 +103,12 @@ int ECDSA_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, ECDSA_DATA *ecdsa = ecdsa_check(eckey); if (ecdsa == NULL) return 0; - return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp); + if (EC_KEY_get_nonce_from_hash(eckey)) + { + /* You cannot precompute the ECDSA nonce if it is required to + * depend on the message. */ + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED); + return 0; + } + return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp, NULL, 0); } diff --git a/app/openssl/crypto/engine/eng_all.c b/app/openssl/crypto/engine/eng_all.c index 22c12045..6093376d 100644 --- a/app/openssl/crypto/engine/eng_all.c +++ b/app/openssl/crypto/engine/eng_all.c @@ -61,6 +61,8 @@ void ENGINE_load_builtin_engines(void) { + /* Some ENGINEs need this */ + OPENSSL_cpuid_setup(); #if 0 /* There's no longer any need for an "openssl" ENGINE unless, one day, * it is the *only* way for standard builtin implementations to be be @@ -70,6 +72,12 @@ void ENGINE_load_builtin_engines(void) #endif #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) ENGINE_load_cryptodev(); +#endif +#ifndef OPENSSL_NO_RSAX + ENGINE_load_rsax(); +#endif +#ifndef OPENSSL_NO_RDRAND + ENGINE_load_rdrand(); #endif ENGINE_load_dynamic(); #ifndef OPENSSL_NO_STATIC_ENGINE @@ -112,6 +120,7 @@ void ENGINE_load_builtin_engines(void) ENGINE_load_capi(); #endif #endif + ENGINE_register_all_complete(); } #if defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV) diff --git a/app/openssl/crypto/engine/eng_cryptodev.c b/app/openssl/crypto/engine/eng_cryptodev.c index 52f4ca39..5a715aca 100644 --- a/app/openssl/crypto/engine/eng_cryptodev.c +++ b/app/openssl/crypto/engine/eng_cryptodev.c @@ -79,8 +79,6 @@ struct dev_crypto_state { unsigned char digest_res[HASH_MAX_LEN]; char *mac_data; int mac_len; - - int copy; #endif }; @@ -200,6 +198,7 @@ get_dev_crypto(void) if ((fd = open_dev_crypto()) == -1) return (-1); +#ifndef CRIOGET_NOT_NEEDED if (ioctl(fd, CRIOGET, &retfd) == -1) return (-1); @@ -208,9 +207,19 @@ get_dev_crypto(void) close(retfd); return (-1); } +#else + retfd = fd; +#endif return (retfd); } +static void put_dev_crypto(int fd) +{ +#ifndef CRIOGET_NOT_NEEDED + close(fd); +#endif +} + /* Caching version for asym operations */ static int get_asym_dev_crypto(void) @@ -252,7 +261,7 @@ get_cryptodev_ciphers(const int **cnids) ioctl(fd, CIOCFSESSION, &sess.ses) != -1) nids[count++] = ciphers[i].nid; } - close(fd); + put_dev_crypto(fd); if (count > 0) *cnids = nids; @@ -291,7 +300,7 @@ get_cryptodev_digests(const int **cnids) ioctl(fd, CIOCFSESSION, &sess.ses) != -1) nids[count++] = digests[i].nid; } - close(fd); + put_dev_crypto(fd); if (count > 0) *cnids = nids; @@ -436,7 +445,7 @@ cryptodev_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, sess->cipher = cipher; if (ioctl(state->d_fd, CIOCGSESSION, sess) == -1) { - close(state->d_fd); + put_dev_crypto(state->d_fd); state->d_fd = -1; return (0); } @@ -473,7 +482,7 @@ cryptodev_cleanup(EVP_CIPHER_CTX *ctx) } else { ret = 1; } - close(state->d_fd); + put_dev_crypto(state->d_fd); state->d_fd = -1; return (ret); @@ -686,7 +695,7 @@ static int cryptodev_digest_init(EVP_MD_CTX *ctx) sess->mac = digest; if (ioctl(state->d_fd, CIOCGSESSION, sess) < 0) { - close(state->d_fd); + put_dev_crypto(state->d_fd); state->d_fd = -1; printf("cryptodev_digest_init: Open session failed\n"); return (0); @@ -758,14 +767,12 @@ static int cryptodev_digest_final(EVP_MD_CTX *ctx, unsigned char *md) if (! (ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) ) { /* if application doesn't support one buffer */ memset(&cryp, 0, sizeof(cryp)); - cryp.ses = sess->ses; cryp.flags = 0; cryp.len = state->mac_len; cryp.src = state->mac_data; cryp.dst = NULL; cryp.mac = (caddr_t)md; - if (ioctl(state->d_fd, CIOCCRYPT, &cryp) < 0) { printf("cryptodev_digest_final: digest failed\n"); return (0); @@ -786,6 +793,9 @@ static int cryptodev_digest_cleanup(EVP_MD_CTX *ctx) struct dev_crypto_state *state = ctx->md_data; struct session_op *sess = &state->d_sess; + if (state == NULL) + return 0; + if (state->d_fd < 0) { printf("cryptodev_digest_cleanup: illegal input\n"); return (0); @@ -797,16 +807,13 @@ static int cryptodev_digest_cleanup(EVP_MD_CTX *ctx) state->mac_len = 0; } - if (state->copy) - return 1; - if (ioctl(state->d_fd, CIOCFSESSION, &sess->ses) < 0) { printf("cryptodev_digest_cleanup: failed to close session\n"); ret = 0; } else { ret = 1; } - close(state->d_fd); + put_dev_crypto(state->d_fd); state->d_fd = -1; return (ret); @@ -816,15 +823,39 @@ static int cryptodev_digest_copy(EVP_MD_CTX *to,const EVP_MD_CTX *from) { struct dev_crypto_state *fstate = from->md_data; struct dev_crypto_state *dstate = to->md_data; + struct session_op *sess; + int digest; - memcpy(dstate, fstate, sizeof(struct dev_crypto_state)); + if (dstate == NULL || fstate == NULL) + return 1; - if (fstate->mac_len != 0) { - dstate->mac_data = OPENSSL_malloc(fstate->mac_len); - memcpy(dstate->mac_data, fstate->mac_data, fstate->mac_len); + memcpy(dstate, fstate, sizeof(struct dev_crypto_state)); + + sess = &dstate->d_sess; + + digest = digest_nid_to_cryptodev(to->digest->type); + + sess->mackey = dstate->dummy_mac_key; + sess->mackeylen = digest_key_length(to->digest->type); + sess->mac = digest; + + dstate->d_fd = get_dev_crypto(); + + if (ioctl(dstate->d_fd, CIOCGSESSION, sess) < 0) { + put_dev_crypto(dstate->d_fd); + dstate->d_fd = -1; + printf("cryptodev_digest_init: Open session failed\n"); + return (0); } - dstate->copy = 1; + if (fstate->mac_len != 0) { + if (fstate->mac_data != NULL) + { + dstate->mac_data = OPENSSL_malloc(fstate->mac_len); + memcpy(dstate->mac_data, fstate->mac_data, fstate->mac_len); + dstate->mac_len = fstate->mac_len; + } + } return 1; } @@ -1347,11 +1378,11 @@ ENGINE_load_cryptodev(void) * find out what asymmetric crypto algorithms we support */ if (ioctl(fd, CIOCASYMFEAT, &cryptodev_asymfeat) == -1) { - close(fd); + put_dev_crypto(fd); ENGINE_free(engine); return; } - close(fd); + put_dev_crypto(fd); if (!ENGINE_set_id(engine, "cryptodev") || !ENGINE_set_name(engine, "BSD cryptodev engine") || diff --git a/app/openssl/crypto/engine/eng_dyn.c b/app/openssl/crypto/engine/eng_dyn.c index 807da7a5..8fb8634e 100644 --- a/app/openssl/crypto/engine/eng_dyn.c +++ b/app/openssl/crypto/engine/eng_dyn.c @@ -408,7 +408,7 @@ static int int_load(dynamic_data_ctx *ctx) int num, loop; /* Unless told not to, try a direct load */ if((ctx->dir_load != 2) && (DSO_load(ctx->dynamic_dso, - ctx->DYNAMIC_LIBNAME, NULL, 0)) != NULL) + ctx->DYNAMIC_LIBNAME, NULL, 0) != NULL)) return 1; /* If we're not allowed to use 'dirs' or we have none, fail */ if(!ctx->dir_load || (num = sk_OPENSSL_STRING_num(ctx->dirs)) < 1) @@ -423,6 +423,9 @@ static int int_load(dynamic_data_ctx *ctx) { /* Found what we're looking for */ OPENSSL_free(merge); + /* Previous failed loop iterations, if any, will have resulted in + * errors. Clear them out before returning success. */ + ERR_clear_error(); return 1; } OPENSSL_free(merge); diff --git a/app/openssl/crypto/engine/eng_fat.c b/app/openssl/crypto/engine/eng_fat.c index db66e623..789b8d57 100644 --- a/app/openssl/crypto/engine/eng_fat.c +++ b/app/openssl/crypto/engine/eng_fat.c @@ -176,6 +176,7 @@ int ENGINE_register_all_complete(void) ENGINE *e; for(e=ENGINE_get_first() ; e ; e=ENGINE_get_next(e)) - ENGINE_register_complete(e); + if (!(e->flags & ENGINE_FLAGS_NO_REGISTER_ALL)) + ENGINE_register_complete(e); return 1; } diff --git a/app/openssl/crypto/engine/eng_list.c b/app/openssl/crypto/engine/eng_list.c index 27846edb..95c85896 100644 --- a/app/openssl/crypto/engine/eng_list.c +++ b/app/openssl/crypto/engine/eng_list.c @@ -408,6 +408,7 @@ ENGINE *ENGINE_by_id(const char *id) !ENGINE_ctrl_cmd_string(iterator, "DIR_LOAD", "2", 0) || !ENGINE_ctrl_cmd_string(iterator, "DIR_ADD", load_dir, 0) || + !ENGINE_ctrl_cmd_string(iterator, "LIST_ADD", "1", 0) || !ENGINE_ctrl_cmd_string(iterator, "LOAD", NULL, 0)) goto notfound; return iterator; diff --git a/app/openssl/crypto/engine/engine.h b/app/openssl/crypto/engine/engine.h index 943aeae2..f8be4977 100644 --- a/app/openssl/crypto/engine/engine.h +++ b/app/openssl/crypto/engine/engine.h @@ -141,6 +141,13 @@ extern "C" { * the existing ENGINE's structural reference count. */ #define ENGINE_FLAGS_BY_ID_COPY (int)0x0004 +/* This flag if for an ENGINE that does not want its methods registered as + * part of ENGINE_register_all_complete() for example if the methods are + * not usable as default methods. + */ + +#define ENGINE_FLAGS_NO_REGISTER_ALL (int)0x0008 + /* ENGINEs can support their own command types, and these flags are used in * ENGINE_CTRL_GET_CMD_FLAGS to indicate to the caller what kind of input each * command expects. Currently only numeric and string input is supported. If a @@ -344,6 +351,8 @@ void ENGINE_load_gost(void); #endif #endif void ENGINE_load_cryptodev(void); +void ENGINE_load_rsax(void); +void ENGINE_load_rdrand(void); void ENGINE_load_builtin_engines(void); /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation diff --git a/app/openssl/crypto/engine/tb_asnmth.c b/app/openssl/crypto/engine/tb_asnmth.c new file mode 100644 index 00000000..75090339 --- /dev/null +++ b/app/openssl/crypto/engine/tb_asnmth.c @@ -0,0 +1,246 @@ +/* ==================================================================== + * Copyright (c) 2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include "eng_int.h" +#include "asn1_locl.h" +#include + +/* If this symbol is defined then ENGINE_get_pkey_asn1_meth_engine(), the + * function that is used by EVP to hook in pkey_asn1_meth code and cache + * defaults (etc), will display brief debugging summaries to stderr with the + * 'nid'. */ +/* #define ENGINE_PKEY_ASN1_METH_DEBUG */ + +static ENGINE_TABLE *pkey_asn1_meth_table = NULL; + +void ENGINE_unregister_pkey_asn1_meths(ENGINE *e) + { + engine_table_unregister(&pkey_asn1_meth_table, e); + } + +static void engine_unregister_all_pkey_asn1_meths(void) + { + engine_table_cleanup(&pkey_asn1_meth_table); + } + +int ENGINE_register_pkey_asn1_meths(ENGINE *e) + { + if(e->pkey_asn1_meths) + { + const int *nids; + int num_nids = e->pkey_asn1_meths(e, NULL, &nids, 0); + if(num_nids > 0) + return engine_table_register(&pkey_asn1_meth_table, + engine_unregister_all_pkey_asn1_meths, e, nids, + num_nids, 0); + } + return 1; + } + +void ENGINE_register_all_pkey_asn1_meths(void) + { + ENGINE *e; + + for(e=ENGINE_get_first() ; e ; e=ENGINE_get_next(e)) + ENGINE_register_pkey_asn1_meths(e); + } + +int ENGINE_set_default_pkey_asn1_meths(ENGINE *e) + { + if(e->pkey_asn1_meths) + { + const int *nids; + int num_nids = e->pkey_asn1_meths(e, NULL, &nids, 0); + if(num_nids > 0) + return engine_table_register(&pkey_asn1_meth_table, + engine_unregister_all_pkey_asn1_meths, e, nids, + num_nids, 1); + } + return 1; + } + +/* Exposed API function to get a functional reference from the implementation + * table (ie. try to get a functional reference from the tabled structural + * references) for a given pkey_asn1_meth 'nid' */ +ENGINE *ENGINE_get_pkey_asn1_meth_engine(int nid) + { + return engine_table_select(&pkey_asn1_meth_table, nid); + } + +/* Obtains a pkey_asn1_meth implementation from an ENGINE functional reference */ +const EVP_PKEY_ASN1_METHOD *ENGINE_get_pkey_asn1_meth(ENGINE *e, int nid) + { + EVP_PKEY_ASN1_METHOD *ret; + ENGINE_PKEY_ASN1_METHS_PTR fn = ENGINE_get_pkey_asn1_meths(e); + if(!fn || !fn(e, &ret, NULL, nid)) + { + ENGINEerr(ENGINE_F_ENGINE_GET_PKEY_ASN1_METH, + ENGINE_R_UNIMPLEMENTED_PUBLIC_KEY_METHOD); + return NULL; + } + return ret; + } + +/* Gets the pkey_asn1_meth callback from an ENGINE structure */ +ENGINE_PKEY_ASN1_METHS_PTR ENGINE_get_pkey_asn1_meths(const ENGINE *e) + { + return e->pkey_asn1_meths; + } + +/* Sets the pkey_asn1_meth callback in an ENGINE structure */ +int ENGINE_set_pkey_asn1_meths(ENGINE *e, ENGINE_PKEY_ASN1_METHS_PTR f) + { + e->pkey_asn1_meths = f; + return 1; + } + +/* Internal function to free up EVP_PKEY_ASN1_METHOD structures before an + * ENGINE is destroyed + */ + +void engine_pkey_asn1_meths_free(ENGINE *e) + { + int i; + EVP_PKEY_ASN1_METHOD *pkm; + if (e->pkey_asn1_meths) + { + const int *pknids; + int npknids; + npknids = e->pkey_asn1_meths(e, NULL, &pknids, 0); + for (i = 0; i < npknids; i++) + { + if (e->pkey_asn1_meths(e, &pkm, NULL, pknids[i])) + { + EVP_PKEY_asn1_free(pkm); + } + } + } + } + +/* Find a method based on a string. This does a linear search through + * all implemented algorithms. This is OK in practice because only + * a small number of algorithms are likely to be implemented in an engine + * and it is not used for speed critical operations. + */ + +const EVP_PKEY_ASN1_METHOD *ENGINE_get_pkey_asn1_meth_str(ENGINE *e, + const char *str, int len) + { + int i, nidcount; + const int *nids; + EVP_PKEY_ASN1_METHOD *ameth; + if (!e->pkey_asn1_meths) + return NULL; + if (len == -1) + len = strlen(str); + nidcount = e->pkey_asn1_meths(e, NULL, &nids, 0); + for (i = 0; i < nidcount; i++) + { + e->pkey_asn1_meths(e, &ameth, NULL, nids[i]); + if (((int)strlen(ameth->pem_str) == len) && + !strncasecmp(ameth->pem_str, str, len)) + return ameth; + } + return NULL; + } + +typedef struct + { + ENGINE *e; + const EVP_PKEY_ASN1_METHOD *ameth; + const char *str; + int len; + } ENGINE_FIND_STR; + +static void look_str_cb(int nid, STACK_OF(ENGINE) *sk, ENGINE *def, void *arg) + { + ENGINE_FIND_STR *lk = arg; + int i; + if (lk->ameth) + return; + for (i = 0; i < sk_ENGINE_num(sk); i++) + { + ENGINE *e = sk_ENGINE_value(sk, i); + EVP_PKEY_ASN1_METHOD *ameth; + e->pkey_asn1_meths(e, &ameth, NULL, nid); + if (((int)strlen(ameth->pem_str) == lk->len) && + !strncasecmp(ameth->pem_str, lk->str, lk->len)) + { + lk->e = e; + lk->ameth = ameth; + return; + } + } + } + +const EVP_PKEY_ASN1_METHOD *ENGINE_pkey_asn1_find_str(ENGINE **pe, + const char *str, int len) + { + ENGINE_FIND_STR fstr; + fstr.e = NULL; + fstr.ameth = NULL; + fstr.str = str; + fstr.len = len; + CRYPTO_w_lock(CRYPTO_LOCK_ENGINE); + engine_table_doall(pkey_asn1_meth_table, look_str_cb, &fstr); + /* If found obtain a structural reference to engine */ + if (fstr.e) + { + fstr.e->struct_ref++; + engine_ref_debug(fstr.e, 0, 1) + } + *pe = fstr.e; + CRYPTO_w_unlock(CRYPTO_LOCK_ENGINE); + return fstr.ameth; + } diff --git a/app/openssl/crypto/engine/tb_pkmeth.c b/app/openssl/crypto/engine/tb_pkmeth.c new file mode 100644 index 00000000..1cdb967f --- /dev/null +++ b/app/openssl/crypto/engine/tb_pkmeth.c @@ -0,0 +1,167 @@ +/* ==================================================================== + * Copyright (c) 2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include "eng_int.h" +#include + +/* If this symbol is defined then ENGINE_get_pkey_meth_engine(), the function + * that is used by EVP to hook in pkey_meth code and cache defaults (etc), will + * display brief debugging summaries to stderr with the 'nid'. */ +/* #define ENGINE_PKEY_METH_DEBUG */ + +static ENGINE_TABLE *pkey_meth_table = NULL; + +void ENGINE_unregister_pkey_meths(ENGINE *e) + { + engine_table_unregister(&pkey_meth_table, e); + } + +static void engine_unregister_all_pkey_meths(void) + { + engine_table_cleanup(&pkey_meth_table); + } + +int ENGINE_register_pkey_meths(ENGINE *e) + { + if(e->pkey_meths) + { + const int *nids; + int num_nids = e->pkey_meths(e, NULL, &nids, 0); + if(num_nids > 0) + return engine_table_register(&pkey_meth_table, + engine_unregister_all_pkey_meths, e, nids, + num_nids, 0); + } + return 1; + } + +void ENGINE_register_all_pkey_meths() + { + ENGINE *e; + + for(e=ENGINE_get_first() ; e ; e=ENGINE_get_next(e)) + ENGINE_register_pkey_meths(e); + } + +int ENGINE_set_default_pkey_meths(ENGINE *e) + { + if(e->pkey_meths) + { + const int *nids; + int num_nids = e->pkey_meths(e, NULL, &nids, 0); + if(num_nids > 0) + return engine_table_register(&pkey_meth_table, + engine_unregister_all_pkey_meths, e, nids, + num_nids, 1); + } + return 1; + } + +/* Exposed API function to get a functional reference from the implementation + * table (ie. try to get a functional reference from the tabled structural + * references) for a given pkey_meth 'nid' */ +ENGINE *ENGINE_get_pkey_meth_engine(int nid) + { + return engine_table_select(&pkey_meth_table, nid); + } + +/* Obtains a pkey_meth implementation from an ENGINE functional reference */ +const EVP_PKEY_METHOD *ENGINE_get_pkey_meth(ENGINE *e, int nid) + { + EVP_PKEY_METHOD *ret; + ENGINE_PKEY_METHS_PTR fn = ENGINE_get_pkey_meths(e); + if(!fn || !fn(e, &ret, NULL, nid)) + { + ENGINEerr(ENGINE_F_ENGINE_GET_PKEY_METH, + ENGINE_R_UNIMPLEMENTED_PUBLIC_KEY_METHOD); + return NULL; + } + return ret; + } + +/* Gets the pkey_meth callback from an ENGINE structure */ +ENGINE_PKEY_METHS_PTR ENGINE_get_pkey_meths(const ENGINE *e) + { + return e->pkey_meths; + } + +/* Sets the pkey_meth callback in an ENGINE structure */ +int ENGINE_set_pkey_meths(ENGINE *e, ENGINE_PKEY_METHS_PTR f) + { + e->pkey_meths = f; + return 1; + } + +/* Internal function to free up EVP_PKEY_METHOD structures before an + * ENGINE is destroyed + */ + +void engine_pkey_meths_free(ENGINE *e) + { + int i; + EVP_PKEY_METHOD *pkm; + if (e->pkey_meths) + { + const int *pknids; + int npknids; + npknids = e->pkey_meths(e, NULL, &pknids, 0); + for (i = 0; i < npknids; i++) + { + if (e->pkey_meths(e, &pkm, NULL, pknids[i])) + { + EVP_PKEY_meth_free(pkm); + } + } + } + } diff --git a/app/openssl/crypto/err/err.c b/app/openssl/crypto/err/err.c index 69713a6e..fcdb2440 100644 --- a/app/openssl/crypto/err/err.c +++ b/app/openssl/crypto/err/err.c @@ -1066,6 +1066,13 @@ void ERR_set_error_data(char *data, int flags) void ERR_add_error_data(int num, ...) { va_list args; + va_start(args, num); + ERR_add_error_vdata(num, args); + va_end(args); + } + +void ERR_add_error_vdata(int num, va_list args) + { int i,n,s; char *str,*p,*a; @@ -1074,7 +1081,6 @@ void ERR_add_error_data(int num, ...) if (str == NULL) return; str[0]='\0'; - va_start(args, num); n=0; for (i=0; i #include +#ifndef OPENSSL_NO_COMP #include +#endif #ifndef OPENSSL_NO_RSA #include #endif @@ -95,6 +97,9 @@ #include #include #include +#ifdef OPENSSL_FIPS +#include +#endif #include #ifndef OPENSSL_NO_CMS #include @@ -102,7 +107,6 @@ #ifndef OPENSSL_NO_JPAKE #include #endif -#include void ERR_load_crypto_strings(void) { @@ -126,7 +130,9 @@ void ERR_load_crypto_strings(void) ERR_load_ASN1_strings(); ERR_load_CONF_strings(); ERR_load_CRYPTO_strings(); +#ifndef OPENSSL_NO_COMP ERR_load_COMP_strings(); +#endif #ifndef OPENSSL_NO_EC ERR_load_EC_strings(); #endif @@ -149,12 +155,14 @@ void ERR_load_crypto_strings(void) #endif ERR_load_OCSP_strings(); ERR_load_UI_strings(); +#ifdef OPENSSL_FIPS + ERR_load_FIPS_strings(); +#endif #ifndef OPENSSL_NO_CMS ERR_load_CMS_strings(); #endif #ifndef OPENSSL_NO_JPAKE ERR_load_JPAKE_strings(); #endif - ERR_load_COMP_strings(); #endif } diff --git a/app/openssl/crypto/evp/bio_b64.c b/app/openssl/crypto/evp/bio_b64.c index 72a2a672..ac6d441a 100644 --- a/app/openssl/crypto/evp/bio_b64.c +++ b/app/openssl/crypto/evp/bio_b64.c @@ -264,7 +264,7 @@ static int b64_read(BIO *b, char *out, int outl) } /* we fell off the end without starting */ - if (j == i) + if ((j == i) && (num == 0)) { /* Is this is one long chunk?, if so, keep on * reading until a new line. */ diff --git a/app/openssl/crypto/evp/bio_md.c b/app/openssl/crypto/evp/bio_md.c index 9841e32e..144fdfd5 100644 --- a/app/openssl/crypto/evp/bio_md.c +++ b/app/openssl/crypto/evp/bio_md.c @@ -153,8 +153,12 @@ static int md_write(BIO *b, const char *in, int inl) { if (ret > 0) { - EVP_DigestUpdate(ctx,(const unsigned char *)in, - (unsigned int)ret); + if (!EVP_DigestUpdate(ctx,(const unsigned char *)in, + (unsigned int)ret)) + { + BIO_clear_retry_flags(b); + return 0; + } } } if(b->next_bio != NULL) @@ -220,7 +224,8 @@ static long md_ctrl(BIO *b, int cmd, long num, void *ptr) case BIO_CTRL_DUP: dbio=ptr; dctx=dbio->ptr; - EVP_MD_CTX_copy_ex(dctx,ctx); + if (!EVP_MD_CTX_copy_ex(dctx,ctx)) + return 0; b->init=1; break; default: diff --git a/app/openssl/crypto/evp/bio_ok.c b/app/openssl/crypto/evp/bio_ok.c index 98bc1ab4..e6433535 100644 --- a/app/openssl/crypto/evp/bio_ok.c +++ b/app/openssl/crypto/evp/bio_ok.c @@ -133,10 +133,10 @@ static int ok_new(BIO *h); static int ok_free(BIO *data); static long ok_callback_ctrl(BIO *h, int cmd, bio_info_cb *fp); -static void sig_out(BIO* b); -static void sig_in(BIO* b); -static void block_out(BIO* b); -static void block_in(BIO* b); +static int sig_out(BIO* b); +static int sig_in(BIO* b); +static int block_out(BIO* b); +static int block_in(BIO* b); #define OK_BLOCK_SIZE (1024*4) #define OK_BLOCK_BLOCK 4 #define IOBS (OK_BLOCK_SIZE+ OK_BLOCK_BLOCK+ 3*EVP_MAX_MD_SIZE) @@ -266,10 +266,24 @@ static int ok_read(BIO *b, char *out, int outl) ctx->buf_len+= i; /* no signature yet -- check if we got one */ - if (ctx->sigio == 1) sig_in(b); + if (ctx->sigio == 1) + { + if (!sig_in(b)) + { + BIO_clear_retry_flags(b); + return 0; + } + } /* signature ok -- check if we got block */ - if (ctx->sigio == 0) block_in(b); + if (ctx->sigio == 0) + { + if (!block_in(b)) + { + BIO_clear_retry_flags(b); + return 0; + } + } /* invalid block -- cancel */ if (ctx->cont <= 0) break; @@ -293,7 +307,8 @@ static int ok_write(BIO *b, const char *in, int inl) if ((ctx == NULL) || (b->next_bio == NULL) || (b->init == 0)) return(0); - if(ctx->sigio) sig_out(b); + if(ctx->sigio && !sig_out(b)) + return 0; do{ BIO_clear_retry_flags(b); @@ -332,7 +347,11 @@ static int ok_write(BIO *b, const char *in, int inl) if(ctx->buf_len >= OK_BLOCK_SIZE+ OK_BLOCK_BLOCK) { - block_out(b); + if (!block_out(b)) + { + BIO_clear_retry_flags(b); + return 0; + } } }while(inl > 0); @@ -379,7 +398,8 @@ static long ok_ctrl(BIO *b, int cmd, long num, void *ptr) case BIO_CTRL_FLUSH: /* do a final write */ if(ctx->blockout == 0) - block_out(b); + if (!block_out(b)) + return 0; while (ctx->blockout) { @@ -408,7 +428,8 @@ static long ok_ctrl(BIO *b, int cmd, long num, void *ptr) break; case BIO_C_SET_MD: md=ptr; - EVP_DigestInit_ex(&ctx->md, md, NULL); + if (!EVP_DigestInit_ex(&ctx->md, md, NULL)) + return 0; b->init=1; break; case BIO_C_GET_MD: @@ -455,7 +476,7 @@ static void longswap(void *_ptr, size_t len) } } -static void sig_out(BIO* b) +static int sig_out(BIO* b) { BIO_OK_CTX *ctx; EVP_MD_CTX *md; @@ -463,9 +484,10 @@ static void sig_out(BIO* b) ctx=b->ptr; md=&ctx->md; - if(ctx->buf_len+ 2* md->digest->md_size > OK_BLOCK_SIZE) return; + if(ctx->buf_len+ 2* md->digest->md_size > OK_BLOCK_SIZE) return 1; - EVP_DigestInit_ex(md, md->digest, NULL); + if (!EVP_DigestInit_ex(md, md->digest, NULL)) + goto berr; /* FIXME: there's absolutely no guarantee this makes any sense at all, * particularly now EVP_MD_CTX has been restructured. */ @@ -474,14 +496,20 @@ static void sig_out(BIO* b) longswap(&(ctx->buf[ctx->buf_len]), md->digest->md_size); ctx->buf_len+= md->digest->md_size; - EVP_DigestUpdate(md, WELLKNOWN, strlen(WELLKNOWN)); - EVP_DigestFinal_ex(md, &(ctx->buf[ctx->buf_len]), NULL); + if (!EVP_DigestUpdate(md, WELLKNOWN, strlen(WELLKNOWN))) + goto berr; + if (!EVP_DigestFinal_ex(md, &(ctx->buf[ctx->buf_len]), NULL)) + goto berr; ctx->buf_len+= md->digest->md_size; ctx->blockout= 1; ctx->sigio= 0; + return 1; + berr: + BIO_clear_retry_flags(b); + return 0; } -static void sig_in(BIO* b) +static int sig_in(BIO* b) { BIO_OK_CTX *ctx; EVP_MD_CTX *md; @@ -491,15 +519,18 @@ static void sig_in(BIO* b) ctx=b->ptr; md=&ctx->md; - if((int)(ctx->buf_len-ctx->buf_off) < 2*md->digest->md_size) return; + if((int)(ctx->buf_len-ctx->buf_off) < 2*md->digest->md_size) return 1; - EVP_DigestInit_ex(md, md->digest, NULL); + if (!EVP_DigestInit_ex(md, md->digest, NULL)) + goto berr; memcpy(md->md_data, &(ctx->buf[ctx->buf_off]), md->digest->md_size); longswap(md->md_data, md->digest->md_size); ctx->buf_off+= md->digest->md_size; - EVP_DigestUpdate(md, WELLKNOWN, strlen(WELLKNOWN)); - EVP_DigestFinal_ex(md, tmp, NULL); + if (!EVP_DigestUpdate(md, WELLKNOWN, strlen(WELLKNOWN))) + goto berr; + if (!EVP_DigestFinal_ex(md, tmp, NULL)) + goto berr; ret= memcmp(&(ctx->buf[ctx->buf_off]), tmp, md->digest->md_size) == 0; ctx->buf_off+= md->digest->md_size; if(ret == 1) @@ -516,9 +547,13 @@ static void sig_in(BIO* b) { ctx->cont= 0; } + return 1; + berr: + BIO_clear_retry_flags(b); + return 0; } -static void block_out(BIO* b) +static int block_out(BIO* b) { BIO_OK_CTX *ctx; EVP_MD_CTX *md; @@ -532,13 +567,20 @@ static void block_out(BIO* b) ctx->buf[1]=(unsigned char)(tl>>16); ctx->buf[2]=(unsigned char)(tl>>8); ctx->buf[3]=(unsigned char)(tl); - EVP_DigestUpdate(md, (unsigned char*) &(ctx->buf[OK_BLOCK_BLOCK]), tl); - EVP_DigestFinal_ex(md, &(ctx->buf[ctx->buf_len]), NULL); + if (!EVP_DigestUpdate(md, + (unsigned char*) &(ctx->buf[OK_BLOCK_BLOCK]), tl)) + goto berr; + if (!EVP_DigestFinal_ex(md, &(ctx->buf[ctx->buf_len]), NULL)) + goto berr; ctx->buf_len+= md->digest->md_size; ctx->blockout= 1; + return 1; + berr: + BIO_clear_retry_flags(b); + return 0; } -static void block_in(BIO* b) +static int block_in(BIO* b) { BIO_OK_CTX *ctx; EVP_MD_CTX *md; @@ -554,10 +596,13 @@ static void block_in(BIO* b) tl|=ctx->buf[2]; tl<<=8; tl|=ctx->buf[3]; - if (ctx->buf_len < tl+ OK_BLOCK_BLOCK+ md->digest->md_size) return; + if (ctx->buf_len < tl+ OK_BLOCK_BLOCK+ md->digest->md_size) return 1; - EVP_DigestUpdate(md, (unsigned char*) &(ctx->buf[OK_BLOCK_BLOCK]), tl); - EVP_DigestFinal_ex(md, tmp, NULL); + if (!EVP_DigestUpdate(md, + (unsigned char*) &(ctx->buf[OK_BLOCK_BLOCK]), tl)) + goto berr; + if (!EVP_DigestFinal_ex(md, tmp, NULL)) + goto berr; if(memcmp(&(ctx->buf[tl+ OK_BLOCK_BLOCK]), tmp, md->digest->md_size) == 0) { /* there might be parts from next block lurking around ! */ @@ -571,5 +616,9 @@ static void block_in(BIO* b) { ctx->cont= 0; } + return 1; + berr: + BIO_clear_retry_flags(b); + return 0; } diff --git a/app/openssl/crypto/evp/c_allc.c b/app/openssl/crypto/evp/c_allc.c index c5f92683..2a45d435 100644 --- a/app/openssl/crypto/evp/c_allc.c +++ b/app/openssl/crypto/evp/c_allc.c @@ -98,6 +98,9 @@ void OpenSSL_add_all_ciphers(void) #ifndef OPENSSL_NO_RC4 EVP_add_cipher(EVP_rc4()); EVP_add_cipher(EVP_rc4_40()); +#ifndef OPENSSL_NO_MD5 + EVP_add_cipher(EVP_rc4_hmac_md5()); +#endif #endif #ifndef OPENSSL_NO_IDEA @@ -166,9 +169,9 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_128_cfb1()); EVP_add_cipher(EVP_aes_128_cfb8()); EVP_add_cipher(EVP_aes_128_ofb()); -#if 0 EVP_add_cipher(EVP_aes_128_ctr()); -#endif + EVP_add_cipher(EVP_aes_128_gcm()); + EVP_add_cipher(EVP_aes_128_xts()); EVP_add_cipher_alias(SN_aes_128_cbc,"AES128"); EVP_add_cipher_alias(SN_aes_128_cbc,"aes128"); EVP_add_cipher(EVP_aes_192_ecb()); @@ -177,9 +180,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_192_cfb1()); EVP_add_cipher(EVP_aes_192_cfb8()); EVP_add_cipher(EVP_aes_192_ofb()); -#if 0 EVP_add_cipher(EVP_aes_192_ctr()); -#endif + EVP_add_cipher(EVP_aes_192_gcm()); EVP_add_cipher_alias(SN_aes_192_cbc,"AES192"); EVP_add_cipher_alias(SN_aes_192_cbc,"aes192"); EVP_add_cipher(EVP_aes_256_ecb()); @@ -188,11 +190,15 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_256_cfb1()); EVP_add_cipher(EVP_aes_256_cfb8()); EVP_add_cipher(EVP_aes_256_ofb()); -#if 0 EVP_add_cipher(EVP_aes_256_ctr()); -#endif + EVP_add_cipher(EVP_aes_256_gcm()); + EVP_add_cipher(EVP_aes_256_xts()); EVP_add_cipher_alias(SN_aes_256_cbc,"AES256"); EVP_add_cipher_alias(SN_aes_256_cbc,"aes256"); +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) + EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1()); + EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1()); +#endif #endif #ifndef OPENSSL_NO_CAMELLIA diff --git a/app/openssl/crypto/evp/digest.c b/app/openssl/crypto/evp/digest.c index 982ba2b1..d14e8e48 100644 --- a/app/openssl/crypto/evp/digest.c +++ b/app/openssl/crypto/evp/digest.c @@ -117,6 +117,10 @@ #include #endif +#ifdef OPENSSL_FIPS +#include +#endif + void EVP_MD_CTX_init(EVP_MD_CTX *ctx) { memset(ctx,'\0',sizeof *ctx); @@ -225,12 +229,26 @@ skip_to_init: } if (ctx->flags & EVP_MD_CTX_FLAG_NO_INIT) return 1; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + { + if (FIPS_digestinit(ctx, type)) + return 1; + OPENSSL_free(ctx->md_data); + ctx->md_data = NULL; + return 0; + } +#endif return ctx->digest->init(ctx); } int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *data, size_t count) { +#ifdef OPENSSL_FIPS + return FIPS_digestupdate(ctx, data, count); +#else return ctx->update(ctx,data,count); +#endif } /* The caller can assume that this removes any secret data from the context */ @@ -245,6 +263,9 @@ int EVP_DigestFinal(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) /* The caller can assume that this removes any secret data from the context */ int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) { +#ifdef OPENSSL_FIPS + return FIPS_digestfinal(ctx, md, size); +#else int ret; OPENSSL_assert(ctx->digest->md_size <= EVP_MAX_MD_SIZE); @@ -258,6 +279,7 @@ int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) } memset(ctx->md_data,0,ctx->digest->ctx_size); return ret; +#endif } int EVP_MD_CTX_copy(EVP_MD_CTX *out, const EVP_MD_CTX *in) @@ -344,13 +366,17 @@ int EVP_Digest(const void *data, size_t count, void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) { - EVP_MD_CTX_cleanup(ctx); - OPENSSL_free(ctx); + if (ctx) + { + EVP_MD_CTX_cleanup(ctx); + OPENSSL_free(ctx); + } } /* This call frees resources associated with the context */ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) { +#ifndef OPENSSL_FIPS /* Don't assume ctx->md_data was cleaned in EVP_Digest_Final, * because sometimes only copies of the context are ever finalised. */ @@ -363,6 +389,7 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) OPENSSL_cleanse(ctx->md_data,ctx->digest->ctx_size); OPENSSL_free(ctx->md_data); } +#endif if (ctx->pctx) EVP_PKEY_CTX_free(ctx->pctx); #ifndef OPENSSL_NO_ENGINE @@ -370,6 +397,9 @@ int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) /* The EVP_MD we used belongs to an ENGINE, release the * functional reference we held for this reason. */ ENGINE_finish(ctx->engine); +#endif +#ifdef OPENSSL_FIPS + FIPS_md_ctx_cleanup(ctx); #endif memset(ctx,'\0',sizeof *ctx); diff --git a/app/openssl/crypto/evp/e_aes.c b/app/openssl/crypto/evp/e_aes.c index bd6c0a3a..41cee42d 100644 --- a/app/openssl/crypto/evp/e_aes.c +++ b/app/openssl/crypto/evp/e_aes.c @@ -1,5 +1,5 @@ /* ==================================================================== - * Copyright (c) 2001 The OpenSSL Project. All rights reserved. + * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,57 +56,519 @@ #include #include #include "evp_locl.h" - -static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, - const unsigned char *iv, int enc); +#ifndef OPENSSL_FIPS +#include "modes_lcl.h" +#include typedef struct { AES_KEY ks; + block128_f block; + union { + cbc128_f cbc; + ctr128_f ctr; + } stream; } EVP_AES_KEY; -#define data(ctx) EVP_C_DATA(EVP_AES_KEY,ctx) - -IMPLEMENT_BLOCK_CIPHER(aes_128, ks, AES, EVP_AES_KEY, - NID_aes_128, 16, 16, 16, 128, - 0, aes_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, - NULL) -IMPLEMENT_BLOCK_CIPHER(aes_192, ks, AES, EVP_AES_KEY, - NID_aes_192, 16, 24, 16, 128, - 0, aes_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, - NULL) -IMPLEMENT_BLOCK_CIPHER(aes_256, ks, AES, EVP_AES_KEY, - NID_aes_256, 16, 32, 16, 128, - 0, aes_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, - NULL) - -#define IMPLEMENT_AES_CFBR(ksize,cbits) IMPLEMENT_CFBR(aes,AES,EVP_AES_KEY,ks,ksize,cbits,16) - -IMPLEMENT_AES_CFBR(128,1) -IMPLEMENT_AES_CFBR(192,1) -IMPLEMENT_AES_CFBR(256,1) - -IMPLEMENT_AES_CFBR(128,8) -IMPLEMENT_AES_CFBR(192,8) -IMPLEMENT_AES_CFBR(256,8) +typedef struct + { + AES_KEY ks; /* AES key schedule to use */ + int key_set; /* Set if key initialised */ + int iv_set; /* Set if an iv is set */ + GCM128_CONTEXT gcm; + unsigned char *iv; /* Temporary IV store */ + int ivlen; /* IV length */ + int taglen; + int iv_gen; /* It is OK to generate IVs */ + int tls_aad_len; /* TLS AAD length */ + ctr128_f ctr; + } EVP_AES_GCM_CTX; + +typedef struct + { + AES_KEY ks1, ks2; /* AES key schedules to use */ + XTS128_CONTEXT xts; + void (*stream)(const unsigned char *in, + unsigned char *out, size_t length, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); + } EVP_AES_XTS_CTX; + +typedef struct + { + AES_KEY ks; /* AES key schedule to use */ + int key_set; /* Set if key initialised */ + int iv_set; /* Set if an iv is set */ + int tag_set; /* Set if tag is valid */ + int len_set; /* Set if message length set */ + int L, M; /* L and M parameters from RFC3610 */ + CCM128_CONTEXT ccm; + ccm128_f str; + } EVP_AES_CCM_CTX; + +#define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) + +#ifdef VPAES_ASM +int vpaes_set_encrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); +int vpaes_set_decrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); + +void vpaes_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void vpaes_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); + +void vpaes_cbc_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, + unsigned char *ivec, int enc); +#endif +#ifdef BSAES_ASM +void bsaes_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char ivec[16], int enc); +void bsaes_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + const unsigned char ivec[16]); +void bsaes_xts_encrypt(const unsigned char *inp, unsigned char *out, + size_t len, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char iv[16]); +void bsaes_xts_decrypt(const unsigned char *inp, unsigned char *out, + size_t len, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char iv[16]); +#endif +#ifdef AES_CTR_ASM +void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + const unsigned char ivec[AES_BLOCK_SIZE]); +#endif +#ifdef AES_XTS_ASM +void AES_xts_encrypt(const char *inp,char *out,size_t len, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); +void AES_xts_decrypt(const char *inp,char *out,size_t len, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); +#endif + +#if defined(AES_ASM) && !defined(I386_ONLY) && ( \ + ((defined(__i386) || defined(__i386__) || \ + defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) + +extern unsigned int OPENSSL_ia32cap_P[2]; + +#ifdef VPAES_ASM +#define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +#endif +#ifdef BSAES_ASM +#define BSAES_CAPABLE VPAES_CAPABLE +#endif +/* + * AES-NI section + */ +#define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) + +int aesni_set_encrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); +int aesni_set_decrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); + +void aesni_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void aesni_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); + +void aesni_ecb_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, + int enc); +void aesni_cbc_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, + unsigned char *ivec, int enc); + +void aesni_ctr32_encrypt_blocks(const unsigned char *in, + unsigned char *out, + size_t blocks, + const void *key, + const unsigned char *ivec); + +void aesni_xts_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); + +void aesni_xts_decrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); + +void aesni_ccm64_encrypt_blocks (const unsigned char *in, + unsigned char *out, + size_t blocks, + const void *key, + const unsigned char ivec[16], + unsigned char cmac[16]); + +void aesni_ccm64_decrypt_blocks (const unsigned char *in, + unsigned char *out, + size_t blocks, + const void *key, + const unsigned char ivec[16], + unsigned char cmac[16]); + +static int aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + int ret, mode; + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) + { + ret = aesni_set_decrypt_key(key, ctx->key_len*8, ctx->cipher_data); + dat->block = (block128_f)aesni_decrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)aesni_cbc_encrypt : + NULL; + } + else { + ret = aesni_set_encrypt_key(key, ctx->key_len*8, ctx->cipher_data); + dat->block = (block128_f)aesni_encrypt; + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)aesni_cbc_encrypt; + else if (mode==EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; + else + dat->stream.cbc = NULL; + } + + if(ret < 0) + { + EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); + return 0; + } + + return 1; + } + +static int aesni_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len) +{ + aesni_cbc_encrypt(in,out,len,ctx->cipher_data,ctx->iv,ctx->encrypt); + + return 1; +} + +static int aesni_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = ctx->cipher->block_size; + + if (lencipher_data,ctx->encrypt); + + return 1; +} + +#define aesni_ofb_cipher aes_ofb_cipher +static int aesni_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aesni_cfb_cipher aes_cfb_cipher +static int aesni_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aesni_cfb8_cipher aes_cfb8_cipher +static int aesni_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aesni_cfb1_cipher aes_cfb1_cipher +static int aesni_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aesni_ctr_cipher aes_ctr_cipher +static int aesni_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) + { + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, + (block128_f)aesni_encrypt); + gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; + /* If we have an iv can set it directly, otherwise use + * saved IV. + */ + if (iv == NULL && gctx->iv_set) + iv = gctx->iv; + if (iv) + { + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + gctx->iv_set = 1; + } + gctx->key_set = 1; + } + else + { + /* If key set use IV, otherwise copy */ + if (gctx->key_set) + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + else + memcpy(gctx->iv, iv, gctx->ivlen); + gctx->iv_set = 1; + gctx->iv_gen = 0; + } + return 1; + } + +#define aesni_gcm_cipher aes_gcm_cipher +static int aesni_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; + if (!iv && !key) + return 1; + + if (key) + { + /* key_len is two AES keys */ + if (enc) + { + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)aesni_encrypt; + xctx->stream = aesni_xts_encrypt; + } + else + { + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)aesni_decrypt; + xctx->stream = aesni_xts_decrypt; + } + + aesni_set_encrypt_key(key + ctx->key_len/2, + ctx->key_len * 4, &xctx->ks2); + xctx->xts.block2 = (block128_f)aesni_encrypt; + + xctx->xts.key1 = &xctx->ks1; + } + + if (iv) + { + xctx->xts.key2 = &xctx->ks2; + memcpy(ctx->iv, iv, 16); + } + + return 1; + } + +#define aesni_xts_cipher aes_xts_cipher +static int aesni_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) + { + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f)aesni_encrypt); + cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks : + (ccm128_f)aesni_ccm64_decrypt_blocks; + cctx->key_set = 1; + } + if (iv) + { + memcpy(ctx->iv, iv, 15 - cctx->L); + cctx->iv_set = 1; + } + return 1; + } + +#define aesni_ccm_cipher aes_ccm_cipher +static int aesni_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER aesni_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aesni_init_key, \ + aesni_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_init_key, \ + aes_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } + +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +static const EVP_CIPHER aesni_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aesni_##mode##_init_key, \ + aesni_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_##mode##_init_key, \ + aes_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } + +#else + +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_init_key, \ + aes_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return &aes_##keylen##_##mode; } + +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_##mode##_init_key, \ + aes_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return &aes_##keylen##_##mode; } + +#endif + +#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) +#include "arm_arch.h" +#if __ARM_ARCH__>=7 +#define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +#endif +#endif + +#define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ + BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb1,cfb1,CFB,flags) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb8,cfb8,CFB,flags) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,ctr,ctr,CTR,flags) static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { - int ret; + int ret, mode; + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; - if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE - || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE - || enc) - ret=AES_set_encrypt_key(key, ctx->key_len * 8, ctx->cipher_data); + mode = ctx->cipher->flags & EVP_CIPH_MODE; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) +#ifdef BSAES_CAPABLE + if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) + { + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)AES_decrypt; + dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt; + } + else +#endif +#ifdef VPAES_CAPABLE + if (VPAES_CAPABLE) + { + ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)vpaes_decrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)vpaes_cbc_encrypt : + NULL; + } + else +#endif + { + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)AES_decrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)AES_cbc_encrypt : + NULL; + } else - ret=AES_set_decrypt_key(key, ctx->key_len * 8, ctx->cipher_data); +#ifdef BSAES_CAPABLE + if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) + { + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)AES_encrypt; + dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; + } + else +#endif +#ifdef VPAES_CAPABLE + if (VPAES_CAPABLE) + { + ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)vpaes_encrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)vpaes_cbc_encrypt : + NULL; + } + else +#endif + { + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)AES_encrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)AES_cbc_encrypt : + NULL; +#ifdef AES_CTR_ASM + if (mode==EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f)AES_ctr32_encrypt; +#endif + } if(ret < 0) { @@ -117,4 +579,752 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } +static int aes_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + if (dat->stream.cbc) + (*dat->stream.cbc)(in,out,len,&dat->ks,ctx->iv,ctx->encrypt); + else if (ctx->encrypt) + CRYPTO_cbc128_encrypt(in,out,len,&dat->ks,ctx->iv,dat->block); + else + CRYPTO_cbc128_encrypt(in,out,len,&dat->ks,ctx->iv,dat->block); + + return 1; +} + +static int aes_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = ctx->cipher->block_size; + size_t i; + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + if (lenblock)(in+i,out+i,&dat->ks); + + return 1; +} + +static int aes_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + CRYPTO_ofb128_encrypt(in,out,len,&dat->ks, + ctx->iv,&ctx->num,dat->block); + return 1; +} + +static int aes_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + CRYPTO_cfb128_encrypt(in,out,len,&dat->ks, + ctx->iv,&ctx->num,ctx->encrypt,dat->block); + return 1; +} + +static int aes_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + CRYPTO_cfb128_8_encrypt(in,out,len,&dat->ks, + ctx->iv,&ctx->num,ctx->encrypt,dat->block); + return 1; +} + +static int aes_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + if (ctx->flags&EVP_CIPH_FLAG_LENGTH_BITS) { + CRYPTO_cfb128_1_encrypt(in,out,len,&dat->ks, + ctx->iv,&ctx->num,ctx->encrypt,dat->block); + return 1; + } + + while (len>=MAXBITCHUNK) { + CRYPTO_cfb128_1_encrypt(in,out,MAXBITCHUNK*8,&dat->ks, + ctx->iv,&ctx->num,ctx->encrypt,dat->block); + len-=MAXBITCHUNK; + } + if (len) + CRYPTO_cfb128_1_encrypt(in,out,len*8,&dat->ks, + ctx->iv,&ctx->num,ctx->encrypt,dat->block); + + return 1; +} + +static int aes_ctr_cipher (EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + unsigned int num = ctx->num; + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; + + if (dat->stream.ctr) + CRYPTO_ctr128_encrypt_ctr32(in,out,len,&dat->ks, + ctx->iv,ctx->buf,&num,dat->stream.ctr); + else + CRYPTO_ctr128_encrypt(in,out,len,&dat->ks, + ctx->iv,ctx->buf,&num,dat->block); + ctx->num = (size_t)num; + return 1; +} + +BLOCK_CIPHER_generic_pack(NID_aes,128,EVP_CIPH_FLAG_FIPS) +BLOCK_CIPHER_generic_pack(NID_aes,192,EVP_CIPH_FLAG_FIPS) +BLOCK_CIPHER_generic_pack(NID_aes,256,EVP_CIPH_FLAG_FIPS) + +static int aes_gcm_cleanup(EVP_CIPHER_CTX *c) + { + EVP_AES_GCM_CTX *gctx = c->cipher_data; + OPENSSL_cleanse(&gctx->gcm, sizeof(gctx->gcm)); + if (gctx->iv != c->iv) + OPENSSL_free(gctx->iv); + return 1; + } + +/* increment counter (64-bit int) by 1 */ +static void ctr64_inc(unsigned char *counter) { + int n=8; + unsigned char c; + + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) + { + EVP_AES_GCM_CTX *gctx = c->cipher_data; + switch (type) + { + case EVP_CTRL_INIT: + gctx->key_set = 0; + gctx->iv_set = 0; + gctx->ivlen = c->cipher->iv_len; + gctx->iv = c->iv; + gctx->taglen = -1; + gctx->iv_gen = 0; + gctx->tls_aad_len = -1; + return 1; + + case EVP_CTRL_GCM_SET_IVLEN: + if (arg <= 0) + return 0; +#ifdef OPENSSL_FIPS + if (FIPS_module_mode() && !(c->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) + && arg < 12) + return 0; +#endif + /* Allocate memory for IV if needed */ + if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) + { + if (gctx->iv != c->iv) + OPENSSL_free(gctx->iv); + gctx->iv = OPENSSL_malloc(arg); + if (!gctx->iv) + return 0; + } + gctx->ivlen = arg; + return 1; + + case EVP_CTRL_GCM_SET_TAG: + if (arg <= 0 || arg > 16 || c->encrypt) + return 0; + memcpy(c->buf, ptr, arg); + gctx->taglen = arg; + return 1; + + case EVP_CTRL_GCM_GET_TAG: + if (arg <= 0 || arg > 16 || !c->encrypt || gctx->taglen < 0) + return 0; + memcpy(ptr, c->buf, arg); + return 1; + + case EVP_CTRL_GCM_SET_IV_FIXED: + /* Special case: -1 length restores whole IV */ + if (arg == -1) + { + memcpy(gctx->iv, ptr, gctx->ivlen); + gctx->iv_gen = 1; + return 1; + } + /* Fixed field must be at least 4 bytes and invocation field + * at least 8. + */ + if ((arg < 4) || (gctx->ivlen - arg) < 8) + return 0; + if (arg) + memcpy(gctx->iv, ptr, arg); + if (c->encrypt && + RAND_bytes(gctx->iv + arg, gctx->ivlen - arg) <= 0) + return 0; + gctx->iv_gen = 1; + return 1; + + case EVP_CTRL_GCM_IV_GEN: + if (gctx->iv_gen == 0 || gctx->key_set == 0) + return 0; + CRYPTO_gcm128_setiv(&gctx->gcm, gctx->iv, gctx->ivlen); + if (arg <= 0 || arg > gctx->ivlen) + arg = gctx->ivlen; + memcpy(ptr, gctx->iv + gctx->ivlen - arg, arg); + /* Invocation field will be at least 8 bytes in size and + * so no need to check wrap around or increment more than + * last 8 bytes. + */ + ctr64_inc(gctx->iv + gctx->ivlen - 8); + gctx->iv_set = 1; + return 1; + + case EVP_CTRL_GCM_SET_IV_INV: + if (gctx->iv_gen == 0 || gctx->key_set == 0 || c->encrypt) + return 0; + memcpy(gctx->iv + gctx->ivlen - arg, ptr, arg); + CRYPTO_gcm128_setiv(&gctx->gcm, gctx->iv, gctx->ivlen); + gctx->iv_set = 1; + return 1; + + case EVP_CTRL_AEAD_TLS1_AAD: + /* Save the AAD for later use */ + if (arg != 13) + return 0; + memcpy(c->buf, ptr, arg); + gctx->tls_aad_len = arg; + { + unsigned int len=c->buf[arg-2]<<8|c->buf[arg-1]; + /* Correct length for explicit IV */ + len -= EVP_GCM_TLS_EXPLICIT_IV_LEN; + /* If decrypting correct for tag too */ + if (!c->encrypt) + len -= EVP_GCM_TLS_TAG_LEN; + c->buf[arg-2] = len>>8; + c->buf[arg-1] = len & 0xff; + } + /* Extra padding: tag appended to record */ + return EVP_GCM_TLS_TAG_LEN; + + default: + return -1; + + } + } + +static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) + { do { +#ifdef BSAES_CAPABLE + if (BSAES_CAPABLE) + { + AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); + CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, + (block128_f)AES_encrypt); + gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; + break; + } + else +#endif +#ifdef VPAES_CAPABLE + if (VPAES_CAPABLE) + { + vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); + CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, + (block128_f)vpaes_encrypt); + gctx->ctr = NULL; + break; + } + else +#endif + (void)0; /* terminate potentially open 'else' */ + + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); +#ifdef AES_CTR_ASM + gctx->ctr = (ctr128_f)AES_ctr32_encrypt; +#else + gctx->ctr = NULL; +#endif + } while (0); + + /* If we have an iv can set it directly, otherwise use + * saved IV. + */ + if (iv == NULL && gctx->iv_set) + iv = gctx->iv; + if (iv) + { + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + gctx->iv_set = 1; + } + gctx->key_set = 1; + } + else + { + /* If key set use IV, otherwise copy */ + if (gctx->key_set) + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + else + memcpy(gctx->iv, iv, gctx->ivlen); + gctx->iv_set = 1; + gctx->iv_gen = 0; + } + return 1; + } + +/* Handle TLS GCM packet format. This consists of the last portion of the IV + * followed by the payload and finally the tag. On encrypt generate IV, + * encrypt payload and write the tag. On verify retrieve IV, decrypt payload + * and verify tag. + */ + +static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + int rv = -1; + /* Encrypt/decrypt must be performed in place */ + if (out != in || len < (EVP_GCM_TLS_EXPLICIT_IV_LEN+EVP_GCM_TLS_TAG_LEN)) + return -1; + /* Set IV from start of buffer or generate IV and write to start + * of buffer. + */ + if (EVP_CIPHER_CTX_ctrl(ctx, ctx->encrypt ? + EVP_CTRL_GCM_IV_GEN : EVP_CTRL_GCM_SET_IV_INV, + EVP_GCM_TLS_EXPLICIT_IV_LEN, out) <= 0) + goto err; + /* Use saved AAD */ + if (CRYPTO_gcm128_aad(&gctx->gcm, ctx->buf, gctx->tls_aad_len)) + goto err; + /* Fix buffer and length to point to payload */ + in += EVP_GCM_TLS_EXPLICIT_IV_LEN; + out += EVP_GCM_TLS_EXPLICIT_IV_LEN; + len -= EVP_GCM_TLS_EXPLICIT_IV_LEN + EVP_GCM_TLS_TAG_LEN; + if (ctx->encrypt) + { + /* Encrypt payload */ + if (gctx->ctr) + { + if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, + in, out, len, + gctx->ctr)) + goto err; + } + else { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + goto err; + } + out += len; + /* Finally write tag */ + CRYPTO_gcm128_tag(&gctx->gcm, out, EVP_GCM_TLS_TAG_LEN); + rv = len + EVP_GCM_TLS_EXPLICIT_IV_LEN + EVP_GCM_TLS_TAG_LEN; + } + else + { + /* Decrypt */ + if (gctx->ctr) + { + if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, + in, out, len, + gctx->ctr)) + goto err; + } + else { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + goto err; + } + /* Retrieve tag */ + CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, + EVP_GCM_TLS_TAG_LEN); + /* If tag mismatch wipe buffer */ + if (memcmp(ctx->buf, in + len, EVP_GCM_TLS_TAG_LEN)) + { + OPENSSL_cleanse(out, len); + goto err; + } + rv = len; + } + + err: + gctx->iv_set = 0; + gctx->tls_aad_len = -1; + return rv; + } + +static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + /* If not set up, return error */ + if (!gctx->key_set) + return -1; + + if (gctx->tls_aad_len >= 0) + return aes_gcm_tls_cipher(ctx, out, in, len); + + if (!gctx->iv_set) + return -1; + if (in) + { + if (out == NULL) + { + if (CRYPTO_gcm128_aad(&gctx->gcm, in, len)) + return -1; + } + else if (ctx->encrypt) + { + if (gctx->ctr) + { + if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, + in, out, len, + gctx->ctr)) + return -1; + } + else { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + return -1; + } + } + else + { + if (gctx->ctr) + { + if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, + in, out, len, + gctx->ctr)) + return -1; + } + else { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + return -1; + } + } + return len; + } + else + { + if (!ctx->encrypt) + { + if (gctx->taglen < 0) + return -1; + if (CRYPTO_gcm128_finish(&gctx->gcm, + ctx->buf, gctx->taglen) != 0) + return -1; + gctx->iv_set = 0; + return 0; + } + CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, 16); + gctx->taglen = 16; + /* Don't reuse the IV */ + gctx->iv_set = 0; + return 0; + } + + } + +#define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ + | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT) + +BLOCK_CIPHER_custom(NID_aes,128,1,12,gcm,GCM, + EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_AEAD_CIPHER|CUSTOM_FLAGS) +BLOCK_CIPHER_custom(NID_aes,192,1,12,gcm,GCM, + EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_AEAD_CIPHER|CUSTOM_FLAGS) +BLOCK_CIPHER_custom(NID_aes,256,1,12,gcm,GCM, + EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_AEAD_CIPHER|CUSTOM_FLAGS) + +static int aes_xts_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) + { + EVP_AES_XTS_CTX *xctx = c->cipher_data; + if (type != EVP_CTRL_INIT) + return -1; + /* key1 and key2 are used as an indicator both key and IV are set */ + xctx->xts.key1 = NULL; + xctx->xts.key2 = NULL; + return 1; + } + +static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; + if (!iv && !key) + return 1; + + if (key) do + { +#ifdef AES_XTS_ASM + xctx->stream = enc ? AES_xts_encrypt : AES_xts_decrypt; +#else + xctx->stream = NULL; +#endif + /* key_len is two AES keys */ +#if !(defined(__arm__) || defined(__arm)) /* not yet? */ +#ifdef BSAES_CAPABLE + if (BSAES_CAPABLE) + xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; + else +#endif +#endif +#ifdef VPAES_CAPABLE + if (VPAES_CAPABLE) + { + if (enc) + { + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)vpaes_encrypt; + } + else + { + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)vpaes_decrypt; + } + + vpaes_set_encrypt_key(key + ctx->key_len/2, + ctx->key_len * 4, &xctx->ks2); + xctx->xts.block2 = (block128_f)vpaes_encrypt; + + xctx->xts.key1 = &xctx->ks1; + break; + } + else +#endif + (void)0; /* terminate potentially open 'else' */ + + if (enc) + { + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)AES_encrypt; + } + else + { + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)AES_decrypt; + } + + AES_set_encrypt_key(key + ctx->key_len/2, + ctx->key_len * 4, &xctx->ks2); + xctx->xts.block2 = (block128_f)AES_encrypt; + + xctx->xts.key1 = &xctx->ks1; + } while (0); + + if (iv) + { + xctx->xts.key2 = &xctx->ks2; + memcpy(ctx->iv, iv, 16); + } + + return 1; + } + +static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; + if (!xctx->xts.key1 || !xctx->xts.key2) + return 0; + if (!out || !in || lenflags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) && + (len > (1UL<<20)*16)) + { + EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE); + return 0; + } +#endif + if (xctx->stream) + (*xctx->stream)(in, out, len, + xctx->xts.key1, xctx->xts.key2, ctx->iv); + else if (CRYPTO_xts128_encrypt(&xctx->xts, ctx->iv, in, out, len, + ctx->encrypt)) + return 0; + return 1; + } + +#define aes_xts_cleanup NULL + +#define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT) + +BLOCK_CIPHER_custom(NID_aes,128,1,16,xts,XTS,EVP_CIPH_FLAG_FIPS|XTS_FLAGS) +BLOCK_CIPHER_custom(NID_aes,256,1,16,xts,XTS,EVP_CIPH_FLAG_FIPS|XTS_FLAGS) + +static int aes_ccm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) + { + EVP_AES_CCM_CTX *cctx = c->cipher_data; + switch (type) + { + case EVP_CTRL_INIT: + cctx->key_set = 0; + cctx->iv_set = 0; + cctx->L = 8; + cctx->M = 12; + cctx->tag_set = 0; + cctx->len_set = 0; + return 1; + + case EVP_CTRL_CCM_SET_IVLEN: + arg = 15 - arg; + case EVP_CTRL_CCM_SET_L: + if (arg < 2 || arg > 8) + return 0; + cctx->L = arg; + return 1; + + case EVP_CTRL_CCM_SET_TAG: + if ((arg & 1) || arg < 4 || arg > 16) + return 0; + if ((c->encrypt && ptr) || (!c->encrypt && !ptr)) + return 0; + if (ptr) + { + cctx->tag_set = 1; + memcpy(c->buf, ptr, arg); + } + cctx->M = arg; + return 1; + + case EVP_CTRL_CCM_GET_TAG: + if (!c->encrypt || !cctx->tag_set) + return 0; + if(!CRYPTO_ccm128_tag(&cctx->ccm, ptr, (size_t)arg)) + return 0; + cctx->tag_set = 0; + cctx->iv_set = 0; + cctx->len_set = 0; + return 1; + + default: + return -1; + + } + } + +static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) do + { +#ifdef VPAES_CAPABLE + if (VPAES_CAPABLE) + { + vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks); + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f)vpaes_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + break; + } +#endif + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f)AES_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + } while (0); + if (iv) + { + memcpy(ctx->iv, iv, 15 - cctx->L); + cctx->iv_set = 1; + } + return 1; + } + +static int aes_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; + CCM128_CONTEXT *ccm = &cctx->ccm; + /* If not set up, return error */ + if (!cctx->iv_set && !cctx->key_set) + return -1; + if (!ctx->encrypt && !cctx->tag_set) + return -1; + if (!out) + { + if (!in) + { + if (CRYPTO_ccm128_setiv(ccm, ctx->iv, 15 - cctx->L,len)) + return -1; + cctx->len_set = 1; + return len; + } + /* If have AAD need message length */ + if (!cctx->len_set && len) + return -1; + CRYPTO_ccm128_aad(ccm, in, len); + return len; + } + /* EVP_*Final() doesn't return any data */ + if (!in) + return 0; + /* If not set length yet do it */ + if (!cctx->len_set) + { + if (CRYPTO_ccm128_setiv(ccm, ctx->iv, 15 - cctx->L, len)) + return -1; + cctx->len_set = 1; + } + if (ctx->encrypt) + { + if (cctx->str ? CRYPTO_ccm128_encrypt_ccm64(ccm, in, out, len, + cctx->str) : + CRYPTO_ccm128_encrypt(ccm, in, out, len)) + return -1; + cctx->tag_set = 1; + return len; + } + else + { + int rv = -1; + if (cctx->str ? !CRYPTO_ccm128_decrypt_ccm64(ccm, in, out, len, + cctx->str) : + !CRYPTO_ccm128_decrypt(ccm, in, out, len)) + { + unsigned char tag[16]; + if (CRYPTO_ccm128_tag(ccm, tag, cctx->M)) + { + if (!memcmp(tag, ctx->buf, cctx->M)) + rv = len; + } + } + if (rv == -1) + OPENSSL_cleanse(out, len); + cctx->iv_set = 0; + cctx->tag_set = 0; + cctx->len_set = 0; + return rv; + } + + } + +#define aes_ccm_cleanup NULL + +BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) +BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) +BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) + +#endif #endif diff --git a/app/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c b/app/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c new file mode 100644 index 00000000..fb2c884a --- /dev/null +++ b/app/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -0,0 +1,581 @@ +/* ==================================================================== + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include + +#include +#include + +#if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA1) + +#include +#include +#include +#include +#include "evp_locl.h" + +#ifndef EVP_CIPH_FLAG_AEAD_CIPHER +#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +#define EVP_CTRL_AEAD_TLS1_AAD 0x16 +#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +#endif + +#if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1) +#define EVP_CIPH_FLAG_DEFAULT_ASN1 0 +#endif + +#define TLS1_1_VERSION 0x0302 + +typedef struct + { + AES_KEY ks; + SHA_CTX head,tail,md; + size_t payload_length; /* AAD length in decrypt case */ + union { + unsigned int tls_ver; + unsigned char tls_aad[16]; /* 13 used */ + } aux; + } EVP_AES_HMAC_SHA1; + +#define NO_PAYLOAD_LENGTH ((size_t)-1) + +#if defined(AES_ASM) && ( \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) + +#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) +# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) +#endif + +extern unsigned int OPENSSL_ia32cap_P[2]; +#define AESNI_CAPABLE (1<<(57-32)) + +int aesni_set_encrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); +int aesni_set_decrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); + +void aesni_cbc_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, + unsigned char *ivec, int enc); + +void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx,const void *in0); + +#define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) + +static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, + const unsigned char *inkey, + const unsigned char *iv, int enc) + { + EVP_AES_HMAC_SHA1 *key = data(ctx); + int ret; + + if (enc) + ret=aesni_set_encrypt_key(inkey,ctx->key_len*8,&key->ks); + else + ret=aesni_set_decrypt_key(inkey,ctx->key_len*8,&key->ks); + + SHA1_Init(&key->head); /* handy when benchmarking */ + key->tail = key->head; + key->md = key->head; + + key->payload_length = NO_PAYLOAD_LENGTH; + + return ret<0?0:1; + } + +#define STITCHED_CALL + +#if !defined(STITCHED_CALL) +#define aes_off 0 +#endif + +void sha1_block_data_order (void *c,const void *p,size_t len); + +static void sha1_update(SHA_CTX *c,const void *data,size_t len) +{ const unsigned char *ptr = data; + size_t res; + + if ((res = c->num)) { + res = SHA_CBLOCK-res; + if (lenNh += len>>29; + c->Nl += len<<=3; + if (c->Nl<(unsigned int)len) c->Nh++; + } + + if (res) + SHA1_Update(c,ptr,res); +} + +#ifdef SHA1_Update +#undef SHA1_Update +#endif +#define SHA1_Update sha1_update + +static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_AES_HMAC_SHA1 *key = data(ctx); + unsigned int l; + size_t plen = key->payload_length, + iv = 0, /* explicit IV in TLS 1.1 and later */ + sha_off = 0; +#if defined(STITCHED_CALL) + size_t aes_off = 0, + blocks; + + sha_off = SHA_CBLOCK-key->md.num; +#endif + + key->payload_length = NO_PAYLOAD_LENGTH; + + if (len%AES_BLOCK_SIZE) return 0; + + if (ctx->encrypt) { + if (plen==NO_PAYLOAD_LENGTH) + plen = len; + else if (len!=((plen+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE)) + return 0; + else if (key->aux.tls_ver >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + +#if defined(STITCHED_CALL) + if (plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA_CBLOCK)) { + SHA1_Update(&key->md,in+iv,sha_off); + + aesni_cbc_sha1_enc(in,out,blocks,&key->ks, + ctx->iv,&key->md,in+iv+sha_off); + blocks *= SHA_CBLOCK; + aes_off += blocks; + sha_off += blocks; + key->md.Nh += blocks>>29; + key->md.Nl += blocks<<=3; + if (key->md.Nl<(unsigned int)blocks) key->md.Nh++; + } else { + sha_off = 0; + } +#endif + sha_off += iv; + SHA1_Update(&key->md,in+sha_off,plen-sha_off); + + if (plen!=len) { /* "TLS" mode of operation */ + if (in!=out) + memcpy(out+aes_off,in+aes_off,plen-aes_off); + + /* calculate HMAC and append it to payload */ + SHA1_Final(out+plen,&key->md); + key->md = key->tail; + SHA1_Update(&key->md,out+plen,SHA_DIGEST_LENGTH); + SHA1_Final(out+plen,&key->md); + + /* pad the payload|hmac */ + plen += SHA_DIGEST_LENGTH; + for (l=len-plen-1;plenks,ctx->iv,1); + } else { + aesni_cbc_encrypt(in+aes_off,out+aes_off,len-aes_off, + &key->ks,ctx->iv,1); + } + } else { + union { unsigned int u[SHA_DIGEST_LENGTH/sizeof(unsigned int)]; + unsigned char c[32+SHA_DIGEST_LENGTH]; } mac, *pmac; + + /* arrange cache line alignment */ + pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32)); + + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); + + if (plen) { /* "TLS" mode of operation */ + size_t inp_len, mask, j, i; + unsigned int res, maxpad, pad, bitlen; + int ret = 1; + union { unsigned int u[SHA_LBLOCK]; + unsigned char c[SHA_CBLOCK]; } + *data = (void *)key->md.data; + + if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3]) + >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + + if (len<(iv+SHA_DIGEST_LENGTH+1)) + return 0; + + /* omit explicit iv */ + out += iv; + len -= iv; + + /* figure out payload length */ + pad = out[len-1]; + maxpad = len-(SHA_DIGEST_LENGTH+1); + maxpad |= (255-maxpad)>>(sizeof(maxpad)*8-8); + maxpad &= 255; + + inp_len = len - (SHA_DIGEST_LENGTH+pad+1); + mask = (0-((inp_len-len)>>(sizeof(inp_len)*8-1))); + inp_len &= mask; + ret &= (int)mask; + + key->aux.tls_aad[plen-2] = inp_len>>8; + key->aux.tls_aad[plen-1] = inp_len; + + /* calculate HMAC */ + key->md = key->head; + SHA1_Update(&key->md,key->aux.tls_aad,plen); + +#if 1 + len -= SHA_DIGEST_LENGTH; /* amend mac */ + if (len>=(256+SHA_CBLOCK)) { + j = (len-(256+SHA_CBLOCK))&(0-SHA_CBLOCK); + j += SHA_CBLOCK-key->md.num; + SHA1_Update(&key->md,out,j); + out += j; + len -= j; + inp_len -= j; + } + + /* but pretend as if we hashed padded payload */ + bitlen = key->md.Nl+(inp_len<<3); /* at most 18 bits */ +#ifdef BSWAP + bitlen = BSWAP(bitlen); +#else + mac.c[0] = 0; + mac.c[1] = (unsigned char)(bitlen>>16); + mac.c[2] = (unsigned char)(bitlen>>8); + mac.c[3] = (unsigned char)bitlen; + bitlen = mac.u[0]; +#endif + + pmac->u[0]=0; + pmac->u[1]=0; + pmac->u[2]=0; + pmac->u[3]=0; + pmac->u[4]=0; + + for (res=key->md.num, j=0;j>(sizeof(j)*8-8); + c &= mask; + c |= 0x80&~mask&~((inp_len-j)>>(sizeof(j)*8-8)); + data->c[res++]=(unsigned char)c; + + if (res!=SHA_CBLOCK) continue; + + /* j is not incremented yet */ + mask = 0-((inp_len+7-j)>>(sizeof(j)*8-1)); + data->u[SHA_LBLOCK-1] |= bitlen&mask; + sha1_block_data_order(&key->md,data,1); + mask &= 0-((j-inp_len-72)>>(sizeof(j)*8-1)); + pmac->u[0] |= key->md.h0 & mask; + pmac->u[1] |= key->md.h1 & mask; + pmac->u[2] |= key->md.h2 & mask; + pmac->u[3] |= key->md.h3 & mask; + pmac->u[4] |= key->md.h4 & mask; + res=0; + } + + for(i=res;ic[i]=0; + + if (res>SHA_CBLOCK-8) { + mask = 0-((inp_len+8-j)>>(sizeof(j)*8-1)); + data->u[SHA_LBLOCK-1] |= bitlen&mask; + sha1_block_data_order(&key->md,data,1); + mask &= 0-((j-inp_len-73)>>(sizeof(j)*8-1)); + pmac->u[0] |= key->md.h0 & mask; + pmac->u[1] |= key->md.h1 & mask; + pmac->u[2] |= key->md.h2 & mask; + pmac->u[3] |= key->md.h3 & mask; + pmac->u[4] |= key->md.h4 & mask; + + memset(data,0,SHA_CBLOCK); + j+=64; + } + data->u[SHA_LBLOCK-1] = bitlen; + sha1_block_data_order(&key->md,data,1); + mask = 0-((j-inp_len-73)>>(sizeof(j)*8-1)); + pmac->u[0] |= key->md.h0 & mask; + pmac->u[1] |= key->md.h1 & mask; + pmac->u[2] |= key->md.h2 & mask; + pmac->u[3] |= key->md.h3 & mask; + pmac->u[4] |= key->md.h4 & mask; + +#ifdef BSWAP + pmac->u[0] = BSWAP(pmac->u[0]); + pmac->u[1] = BSWAP(pmac->u[1]); + pmac->u[2] = BSWAP(pmac->u[2]); + pmac->u[3] = BSWAP(pmac->u[3]); + pmac->u[4] = BSWAP(pmac->u[4]); +#else + for (i=0;i<5;i++) { + res = pmac->u[i]; + pmac->c[4*i+0]=(unsigned char)(res>>24); + pmac->c[4*i+1]=(unsigned char)(res>>16); + pmac->c[4*i+2]=(unsigned char)(res>>8); + pmac->c[4*i+3]=(unsigned char)res; + } +#endif + len += SHA_DIGEST_LENGTH; +#else + SHA1_Update(&key->md,out,inp_len); + res = key->md.num; + SHA1_Final(pmac->c,&key->md); + + { + unsigned int inp_blocks, pad_blocks; + + /* but pretend as if we hashed padded payload */ + inp_blocks = 1+((SHA_CBLOCK-9-res)>>(sizeof(res)*8-1)); + res += (unsigned int)(len-inp_len); + pad_blocks = res / SHA_CBLOCK; + res %= SHA_CBLOCK; + pad_blocks += 1+((SHA_CBLOCK-9-res)>>(sizeof(res)*8-1)); + for (;inp_blocksmd,data,1); + } +#endif + key->md = key->tail; + SHA1_Update(&key->md,pmac->c,SHA_DIGEST_LENGTH); + SHA1_Final(pmac->c,&key->md); + + /* verify HMAC */ + out += inp_len; + len -= inp_len; +#if 1 + { + unsigned char *p = out+len-1-maxpad-SHA_DIGEST_LENGTH; + size_t off = out-p; + unsigned int c, cmask; + + maxpad += SHA_DIGEST_LENGTH; + for (res=0,i=0,j=0;j>(sizeof(int)*8-1); + res |= (c^pad)&~cmask; /* ... and padding */ + cmask &= ((int)(off-1-j))>>(sizeof(int)*8-1); + res |= (c^pmac->c[i])&cmask; + i += 1&cmask; + } + maxpad -= SHA_DIGEST_LENGTH; + + res = 0-((0-res)>>(sizeof(res)*8-1)); + ret &= (int)~res; + } +#else + for (res=0,i=0;ic[i]; + res = 0-((0-res)>>(sizeof(res)*8-1)); + ret &= (int)~res; + + /* verify padding */ + pad = (pad&~res) | (maxpad&res); + out = out+len-1-pad; + for (res=0,i=0;i>(sizeof(res)*8-1); + ret &= (int)~res; +#endif + return ret; + } else { + SHA1_Update(&key->md,out,len); + } + } + + return 1; + } + +static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) + { + EVP_AES_HMAC_SHA1 *key = data(ctx); + + switch (type) + { + case EVP_CTRL_AEAD_SET_MAC_KEY: + { + unsigned int i; + unsigned char hmac_key[64]; + + memset (hmac_key,0,sizeof(hmac_key)); + + if (arg > (int)sizeof(hmac_key)) { + SHA1_Init(&key->head); + SHA1_Update(&key->head,ptr,arg); + SHA1_Final(hmac_key,&key->head); + } else { + memcpy(hmac_key,ptr,arg); + } + + for (i=0;ihead); + SHA1_Update(&key->head,hmac_key,sizeof(hmac_key)); + + for (i=0;itail); + SHA1_Update(&key->tail,hmac_key,sizeof(hmac_key)); + + OPENSSL_cleanse(hmac_key,sizeof(hmac_key)); + + return 1; + } + case EVP_CTRL_AEAD_TLS1_AAD: + { + unsigned char *p=ptr; + unsigned int len=p[arg-2]<<8|p[arg-1]; + + if (ctx->encrypt) + { + key->payload_length = len; + if ((key->aux.tls_ver=p[arg-4]<<8|p[arg-3]) >= TLS1_1_VERSION) { + len -= AES_BLOCK_SIZE; + p[arg-2] = len>>8; + p[arg-1] = len; + } + key->md = key->head; + SHA1_Update(&key->md,p,arg); + + return (int)(((len+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE) + - len); + } + else + { + if (arg>13) arg = 13; + memcpy(key->aux.tls_aad,ptr,arg); + key->payload_length = arg; + + return SHA_DIGEST_LENGTH; + } + } + default: + return -1; + } + } + +static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = + { +#ifdef NID_aes_128_cbc_hmac_sha1 + NID_aes_128_cbc_hmac_sha1, +#else + NID_undef, +#endif + 16,16,16, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + aesni_cbc_hmac_sha1_init_key, + aesni_cbc_hmac_sha1_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA1), + EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha1_ctrl, + NULL + }; + +static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = + { +#ifdef NID_aes_256_cbc_hmac_sha1 + NID_aes_256_cbc_hmac_sha1, +#else + NID_undef, +#endif + 16,32,16, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + aesni_cbc_hmac_sha1_init_key, + aesni_cbc_hmac_sha1_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA1), + EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha1_ctrl, + NULL + }; + +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void) + { + return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE? + &aesni_128_cbc_hmac_sha1_cipher:NULL); + } + +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void) + { + return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE? + &aesni_256_cbc_hmac_sha1_cipher:NULL); + } +#else +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void) + { + return NULL; + } +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void) + { + return NULL; + } +#endif +#endif diff --git a/app/openssl/crypto/evp/e_des3.c b/app/openssl/crypto/evp/e_des3.c index 3232cfe0..8d7b7de2 100644 --- a/app/openssl/crypto/evp/e_des3.c +++ b/app/openssl/crypto/evp/e_des3.c @@ -65,6 +65,8 @@ #include #include +#ifndef OPENSSL_FIPS + static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv,int enc); @@ -99,7 +101,7 @@ static int des_ede_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { - if (inl>=EVP_MAXCHUNK) + while (inl>=EVP_MAXCHUNK) { DES_ede3_ofb64_encrypt(in, out, (long)EVP_MAXCHUNK, &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, @@ -130,7 +132,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, printf("\n"); } #endif /* KSSL_DEBUG */ - if (inl>=EVP_MAXCHUNK) + while (inl>=EVP_MAXCHUNK) { DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, @@ -149,7 +151,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_ede_cfb64_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { - if (inl>=EVP_MAXCHUNK) + while (inl>=EVP_MAXCHUNK) { DES_ede3_cfb64_encrypt(in, out, (long)EVP_MAXCHUNK, &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, @@ -311,3 +313,4 @@ const EVP_CIPHER *EVP_des_ede3(void) return &des_ede3_ecb; } #endif +#endif diff --git a/app/openssl/crypto/evp/e_null.c b/app/openssl/crypto/evp/e_null.c index 7cf50e14..f0c1f78b 100644 --- a/app/openssl/crypto/evp/e_null.c +++ b/app/openssl/crypto/evp/e_null.c @@ -61,6 +61,8 @@ #include #include +#ifndef OPENSSL_FIPS + static int null_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv,int enc); static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, @@ -99,4 +101,4 @@ static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, memcpy((char *)out,(const char *)in,inl); return 1; } - +#endif diff --git a/app/openssl/crypto/evp/e_rc2.c b/app/openssl/crypto/evp/e_rc2.c index f78d7811..d4c33b58 100644 --- a/app/openssl/crypto/evp/e_rc2.c +++ b/app/openssl/crypto/evp/e_rc2.c @@ -183,7 +183,8 @@ static int rc2_get_asn1_type_and_iv(EVP_CIPHER_CTX *c, ASN1_TYPE *type) key_bits =rc2_magic_to_meth((int)num); if (!key_bits) return(-1); - if(i > 0) EVP_CipherInit_ex(c, NULL, NULL, NULL, iv, -1); + if(i > 0 && !EVP_CipherInit_ex(c, NULL, NULL, NULL, iv, -1)) + return -1; EVP_CIPHER_CTX_ctrl(c, EVP_CTRL_SET_RC2_KEY_BITS, key_bits, NULL); EVP_CIPHER_CTX_set_key_length(c, key_bits / 8); } diff --git a/app/openssl/crypto/evp/e_rc4.c b/app/openssl/crypto/evp/e_rc4.c index 8b5175e0..b4f6bda8 100644 --- a/app/openssl/crypto/evp/e_rc4.c +++ b/app/openssl/crypto/evp/e_rc4.c @@ -62,6 +62,7 @@ #ifndef OPENSSL_NO_RC4 #include +#include "evp_locl.h" #include #include diff --git a/app/openssl/crypto/evp/e_rc4_hmac_md5.c b/app/openssl/crypto/evp/e_rc4_hmac_md5.c new file mode 100644 index 00000000..56563191 --- /dev/null +++ b/app/openssl/crypto/evp/e_rc4_hmac_md5.c @@ -0,0 +1,298 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include + +#include +#include + +#if !defined(OPENSSL_NO_RC4) && !defined(OPENSSL_NO_MD5) + +#include +#include +#include +#include + +#ifndef EVP_CIPH_FLAG_AEAD_CIPHER +#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +#define EVP_CTRL_AEAD_TLS1_AAD 0x16 +#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +#endif + +/* FIXME: surely this is available elsewhere? */ +#define EVP_RC4_KEY_SIZE 16 + +typedef struct + { + RC4_KEY ks; + MD5_CTX head,tail,md; + size_t payload_length; + } EVP_RC4_HMAC_MD5; + +#define NO_PAYLOAD_LENGTH ((size_t)-1) + +void rc4_md5_enc (RC4_KEY *key, const void *in0, void *out, + MD5_CTX *ctx,const void *inp,size_t blocks); + +#define data(ctx) ((EVP_RC4_HMAC_MD5 *)(ctx)->cipher_data) + +static int rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, + const unsigned char *inkey, + const unsigned char *iv, int enc) + { + EVP_RC4_HMAC_MD5 *key = data(ctx); + + RC4_set_key(&key->ks,EVP_CIPHER_CTX_key_length(ctx), + inkey); + + MD5_Init(&key->head); /* handy when benchmarking */ + key->tail = key->head; + key->md = key->head; + + key->payload_length = NO_PAYLOAD_LENGTH; + + return 1; + } + +#if !defined(OPENSSL_NO_ASM) && ( \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) && \ + !(defined(__APPLE__) && defined(__MACH__)) +#define STITCHED_CALL +#endif + +#if !defined(STITCHED_CALL) +#define rc4_off 0 +#define md5_off 0 +#endif + +static int rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + EVP_RC4_HMAC_MD5 *key = data(ctx); +#if defined(STITCHED_CALL) + size_t rc4_off = 32-1-(key->ks.x&(32-1)), /* 32 is $MOD from rc4_md5-x86_64.pl */ + md5_off = MD5_CBLOCK-key->md.num, + blocks; + unsigned int l; + extern unsigned int OPENSSL_ia32cap_P[]; +#endif + size_t plen = key->payload_length; + + if (plen!=NO_PAYLOAD_LENGTH && len!=(plen+MD5_DIGEST_LENGTH)) return 0; + + if (ctx->encrypt) { + if (plen==NO_PAYLOAD_LENGTH) plen = len; +#if defined(STITCHED_CALL) + /* cipher has to "fall behind" */ + if (rc4_off>md5_off) md5_off+=MD5_CBLOCK; + + if (plen>md5_off && (blocks=(plen-md5_off)/MD5_CBLOCK) && + (OPENSSL_ia32cap_P[0]&(1<<20))==0) { + MD5_Update(&key->md,in,md5_off); + RC4(&key->ks,rc4_off,in,out); + + rc4_md5_enc(&key->ks,in+rc4_off,out+rc4_off, + &key->md,in+md5_off,blocks); + blocks *= MD5_CBLOCK; + rc4_off += blocks; + md5_off += blocks; + key->md.Nh += blocks>>29; + key->md.Nl += blocks<<=3; + if (key->md.Nl<(unsigned int)blocks) key->md.Nh++; + } else { + rc4_off = 0; + md5_off = 0; + } +#endif + MD5_Update(&key->md,in+md5_off,plen-md5_off); + + if (plen!=len) { /* "TLS" mode of operation */ + if (in!=out) + memcpy(out+rc4_off,in+rc4_off,plen-rc4_off); + + /* calculate HMAC and append it to payload */ + MD5_Final(out+plen,&key->md); + key->md = key->tail; + MD5_Update(&key->md,out+plen,MD5_DIGEST_LENGTH); + MD5_Final(out+plen,&key->md); + /* encrypt HMAC at once */ + RC4(&key->ks,len-rc4_off,out+rc4_off,out+rc4_off); + } else { + RC4(&key->ks,len-rc4_off,in+rc4_off,out+rc4_off); + } + } else { + unsigned char mac[MD5_DIGEST_LENGTH]; +#if defined(STITCHED_CALL) + /* digest has to "fall behind" */ + if (md5_off>rc4_off) rc4_off += 2*MD5_CBLOCK; + else rc4_off += MD5_CBLOCK; + + if (len>rc4_off && (blocks=(len-rc4_off)/MD5_CBLOCK) && + (OPENSSL_ia32cap_P[0]&(1<<20))==0) { + RC4(&key->ks,rc4_off,in,out); + MD5_Update(&key->md,out,md5_off); + + rc4_md5_enc(&key->ks,in+rc4_off,out+rc4_off, + &key->md,out+md5_off,blocks); + blocks *= MD5_CBLOCK; + rc4_off += blocks; + md5_off += blocks; + l = (key->md.Nl+(blocks<<3))&0xffffffffU; + if (lmd.Nl) key->md.Nh++; + key->md.Nl = l; + key->md.Nh += blocks>>29; + } else { + md5_off=0; + rc4_off=0; + } +#endif + /* decrypt HMAC at once */ + RC4(&key->ks,len-rc4_off,in+rc4_off,out+rc4_off); + if (plen!=NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ + MD5_Update(&key->md,out+md5_off,plen-md5_off); + + /* calculate HMAC and verify it */ + MD5_Final(mac,&key->md); + key->md = key->tail; + MD5_Update(&key->md,mac,MD5_DIGEST_LENGTH); + MD5_Final(mac,&key->md); + + if (memcmp(out+plen,mac,MD5_DIGEST_LENGTH)) + return 0; + } else { + MD5_Update(&key->md,out+md5_off,len-md5_off); + } + } + + key->payload_length = NO_PAYLOAD_LENGTH; + + return 1; + } + +static int rc4_hmac_md5_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) + { + EVP_RC4_HMAC_MD5 *key = data(ctx); + + switch (type) + { + case EVP_CTRL_AEAD_SET_MAC_KEY: + { + unsigned int i; + unsigned char hmac_key[64]; + + memset (hmac_key,0,sizeof(hmac_key)); + + if (arg > (int)sizeof(hmac_key)) { + MD5_Init(&key->head); + MD5_Update(&key->head,ptr,arg); + MD5_Final(hmac_key,&key->head); + } else { + memcpy(hmac_key,ptr,arg); + } + + for (i=0;ihead); + MD5_Update(&key->head,hmac_key,sizeof(hmac_key)); + + for (i=0;itail); + MD5_Update(&key->tail,hmac_key,sizeof(hmac_key)); + + return 1; + } + case EVP_CTRL_AEAD_TLS1_AAD: + { + unsigned char *p=ptr; + unsigned int len=p[arg-2]<<8|p[arg-1]; + + if (!ctx->encrypt) + { + len -= MD5_DIGEST_LENGTH; + p[arg-2] = len>>8; + p[arg-1] = len; + } + key->payload_length=len; + key->md = key->head; + MD5_Update(&key->md,p,arg); + + return MD5_DIGEST_LENGTH; + } + default: + return -1; + } + } + +static EVP_CIPHER r4_hmac_md5_cipher= + { +#ifdef NID_rc4_hmac_md5 + NID_rc4_hmac_md5, +#else + NID_undef, +#endif + 1,EVP_RC4_KEY_SIZE,0, + EVP_CIPH_STREAM_CIPHER|EVP_CIPH_VARIABLE_LENGTH|EVP_CIPH_FLAG_AEAD_CIPHER, + rc4_hmac_md5_init_key, + rc4_hmac_md5_cipher, + NULL, + sizeof(EVP_RC4_HMAC_MD5), + NULL, + NULL, + rc4_hmac_md5_ctrl, + NULL + }; + +const EVP_CIPHER *EVP_rc4_hmac_md5(void) + { + return(&r4_hmac_md5_cipher); + } +#endif diff --git a/app/openssl/crypto/evp/evp.h b/app/openssl/crypto/evp/evp.h index 9f9795e2..e43a58e6 100644 --- a/app/openssl/crypto/evp/evp.h +++ b/app/openssl/crypto/evp/evp.h @@ -83,7 +83,7 @@ #define EVP_RC5_32_12_16_KEY_SIZE 16 */ #define EVP_MAX_MD_SIZE 64 /* longest known is SHA512 */ -#define EVP_MAX_KEY_LENGTH 32 +#define EVP_MAX_KEY_LENGTH 64 #define EVP_MAX_IV_LENGTH 16 #define EVP_MAX_BLOCK_LENGTH 32 @@ -116,6 +116,7 @@ #define EVP_PKEY_DH NID_dhKeyAgreement #define EVP_PKEY_EC NID_X9_62_id_ecPublicKey #define EVP_PKEY_HMAC NID_hmac +#define EVP_PKEY_CMAC NID_cmac #ifdef __cplusplus extern "C" { @@ -216,6 +217,8 @@ typedef int evp_verify_method(int type,const unsigned char *m, #define EVP_MD_FLAG_DIGALGID_CUSTOM 0x0018 +#define EVP_MD_FLAG_FIPS 0x0400 /* Note if suitable for use in FIPS mode */ + /* Digest ctrls */ #define EVP_MD_CTRL_DIGALGID 0x1 @@ -325,6 +328,10 @@ struct evp_cipher_st #define EVP_CIPH_CBC_MODE 0x2 #define EVP_CIPH_CFB_MODE 0x3 #define EVP_CIPH_OFB_MODE 0x4 +#define EVP_CIPH_CTR_MODE 0x5 +#define EVP_CIPH_GCM_MODE 0x6 +#define EVP_CIPH_CCM_MODE 0x7 +#define EVP_CIPH_XTS_MODE 0x10001 #define EVP_CIPH_MODE 0xF0007 /* Set if variable length cipher */ #define EVP_CIPH_VARIABLE_LENGTH 0x8 @@ -346,6 +353,15 @@ struct evp_cipher_st #define EVP_CIPH_FLAG_DEFAULT_ASN1 0x1000 /* Buffer length in bits not bytes: CFB1 mode only */ #define EVP_CIPH_FLAG_LENGTH_BITS 0x2000 +/* Note if suitable for use in FIPS mode */ +#define EVP_CIPH_FLAG_FIPS 0x4000 +/* Allow non FIPS cipher in FIPS mode */ +#define EVP_CIPH_FLAG_NON_FIPS_ALLOW 0x8000 +/* Cipher handles any and all padding logic as well + * as finalisation. + */ +#define EVP_CIPH_FLAG_CUSTOM_CIPHER 0x100000 +#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 /* ctrl() values */ @@ -358,6 +374,33 @@ struct evp_cipher_st #define EVP_CTRL_RAND_KEY 0x6 #define EVP_CTRL_PBE_PRF_NID 0x7 #define EVP_CTRL_COPY 0x8 +#define EVP_CTRL_GCM_SET_IVLEN 0x9 +#define EVP_CTRL_GCM_GET_TAG 0x10 +#define EVP_CTRL_GCM_SET_TAG 0x11 +#define EVP_CTRL_GCM_SET_IV_FIXED 0x12 +#define EVP_CTRL_GCM_IV_GEN 0x13 +#define EVP_CTRL_CCM_SET_IVLEN EVP_CTRL_GCM_SET_IVLEN +#define EVP_CTRL_CCM_GET_TAG EVP_CTRL_GCM_GET_TAG +#define EVP_CTRL_CCM_SET_TAG EVP_CTRL_GCM_SET_TAG +#define EVP_CTRL_CCM_SET_L 0x14 +#define EVP_CTRL_CCM_SET_MSGLEN 0x15 +/* AEAD cipher deduces payload length and returns number of bytes + * required to store MAC and eventual padding. Subsequent call to + * EVP_Cipher even appends/verifies MAC. + */ +#define EVP_CTRL_AEAD_TLS1_AAD 0x16 +/* Used by composite AEAD ciphers, no-op in GCM, CCM... */ +#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +/* Set the GCM invocation field, decrypt only */ +#define EVP_CTRL_GCM_SET_IV_INV 0x18 + +/* GCM TLS constants */ +/* Length of fixed part of IV derived from PRF */ +#define EVP_GCM_TLS_FIXED_IV_LEN 4 +/* Length of explicit part of IV part of TLS records */ +#define EVP_GCM_TLS_EXPLICIT_IV_LEN 8 +/* Length of tag for TLS */ +#define EVP_GCM_TLS_TAG_LEN 16 typedef struct evp_cipher_info_st { @@ -375,7 +418,7 @@ struct evp_cipher_ctx_st unsigned char oiv[EVP_MAX_IV_LENGTH]; /* original iv */ unsigned char iv[EVP_MAX_IV_LENGTH]; /* working iv */ unsigned char buf[EVP_MAX_BLOCK_LENGTH];/* saved partial block */ - int num; /* used by cfb/ofb mode */ + int num; /* used by cfb/ofb/ctr mode */ void *app_data; /* application stuff */ int key_len; /* May change for variable length cipher */ @@ -695,6 +738,9 @@ const EVP_MD *EVP_dev_crypto_md5(void); #ifndef OPENSSL_NO_RC4 const EVP_CIPHER *EVP_rc4(void); const EVP_CIPHER *EVP_rc4_40(void); +#ifndef OPENSSL_NO_MD5 +const EVP_CIPHER *EVP_rc4_hmac_md5(void); +#endif #endif #ifndef OPENSSL_NO_IDEA const EVP_CIPHER *EVP_idea_ecb(void); @@ -741,9 +787,10 @@ const EVP_CIPHER *EVP_aes_128_cfb8(void); const EVP_CIPHER *EVP_aes_128_cfb128(void); # define EVP_aes_128_cfb EVP_aes_128_cfb128 const EVP_CIPHER *EVP_aes_128_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_128_ctr(void); -#endif +const EVP_CIPHER *EVP_aes_128_ccm(void); +const EVP_CIPHER *EVP_aes_128_gcm(void); +const EVP_CIPHER *EVP_aes_128_xts(void); const EVP_CIPHER *EVP_aes_192_ecb(void); const EVP_CIPHER *EVP_aes_192_cbc(void); const EVP_CIPHER *EVP_aes_192_cfb1(void); @@ -751,9 +798,9 @@ const EVP_CIPHER *EVP_aes_192_cfb8(void); const EVP_CIPHER *EVP_aes_192_cfb128(void); # define EVP_aes_192_cfb EVP_aes_192_cfb128 const EVP_CIPHER *EVP_aes_192_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_192_ctr(void); -#endif +const EVP_CIPHER *EVP_aes_192_ccm(void); +const EVP_CIPHER *EVP_aes_192_gcm(void); const EVP_CIPHER *EVP_aes_256_ecb(void); const EVP_CIPHER *EVP_aes_256_cbc(void); const EVP_CIPHER *EVP_aes_256_cfb1(void); @@ -761,8 +808,13 @@ const EVP_CIPHER *EVP_aes_256_cfb8(void); const EVP_CIPHER *EVP_aes_256_cfb128(void); # define EVP_aes_256_cfb EVP_aes_256_cfb128 const EVP_CIPHER *EVP_aes_256_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_256_ctr(void); +const EVP_CIPHER *EVP_aes_256_ccm(void); +const EVP_CIPHER *EVP_aes_256_gcm(void); +const EVP_CIPHER *EVP_aes_256_xts(void); +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void); +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void); #endif #endif #ifndef OPENSSL_NO_CAMELLIA @@ -869,6 +921,7 @@ struct ec_key_st *EVP_PKEY_get1_EC_KEY(EVP_PKEY *pkey); #endif EVP_PKEY * EVP_PKEY_new(void); +EVP_PKEY * EVP_PKEY_dup(EVP_PKEY *pkey); void EVP_PKEY_free(EVP_PKEY *pkey); EVP_PKEY * d2i_PublicKey(int type,EVP_PKEY **a, const unsigned char **pp, @@ -1047,13 +1100,22 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, #define EVP_PKEY_CTRL_CMS_DECRYPT 10 #define EVP_PKEY_CTRL_CMS_SIGN 11 +#define EVP_PKEY_CTRL_CIPHER 12 + #define EVP_PKEY_ALG_CTRL 0x1000 #define EVP_PKEY_FLAG_AUTOARGLEN 2 +/* Method handles all operations: don't assume any digest related + * defaults. + */ +#define EVP_PKEY_FLAG_SIGCTX_CUSTOM 4 const EVP_PKEY_METHOD *EVP_PKEY_meth_find(int type); EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags); +void EVP_PKEY_meth_get0_info(int *ppkey_id, int *pflags, + const EVP_PKEY_METHOD *meth); +void EVP_PKEY_meth_copy(EVP_PKEY_METHOD *dst, const EVP_PKEY_METHOD *src); void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth); int EVP_PKEY_meth_add0(const EVP_PKEY_METHOD *pmeth); @@ -1071,7 +1133,7 @@ int EVP_PKEY_CTX_get_operation(EVP_PKEY_CTX *ctx); void EVP_PKEY_CTX_set0_keygen_info(EVP_PKEY_CTX *ctx, int *dat, int datlen); EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e, - unsigned char *key, int keylen); + const unsigned char *key, int keylen); void EVP_PKEY_CTX_set_data(EVP_PKEY_CTX *ctx, void *data); void *EVP_PKEY_CTX_get_data(EVP_PKEY_CTX *ctx); @@ -1181,6 +1243,8 @@ void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth, int (*ctrl_str)(EVP_PKEY_CTX *ctx, const char *type, const char *value)); +void EVP_add_alg_module(void); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -1190,8 +1254,14 @@ void ERR_load_EVP_strings(void); /* Error codes for the EVP functions. */ /* Function codes. */ +#define EVP_F_AESNI_INIT_KEY 165 +#define EVP_F_AESNI_XTS_CIPHER 176 #define EVP_F_AES_INIT_KEY 133 +#define EVP_F_AES_XTS 172 +#define EVP_F_AES_XTS_CIPHER 175 +#define EVP_F_ALG_MODULE_INIT 177 #define EVP_F_CAMELLIA_INIT_KEY 159 +#define EVP_F_CMAC_INIT 173 #define EVP_F_D2I_PKEY 100 #define EVP_F_DO_SIGVER_INIT 161 #define EVP_F_DSAPKEY2PKCS8 134 @@ -1246,15 +1316,24 @@ void ERR_load_EVP_strings(void); #define EVP_F_EVP_RIJNDAEL 126 #define EVP_F_EVP_SIGNFINAL 107 #define EVP_F_EVP_VERIFYFINAL 108 +#define EVP_F_FIPS_CIPHERINIT 166 +#define EVP_F_FIPS_CIPHER_CTX_COPY 170 +#define EVP_F_FIPS_CIPHER_CTX_CTRL 167 +#define EVP_F_FIPS_CIPHER_CTX_SET_KEY_LENGTH 171 +#define EVP_F_FIPS_DIGESTINIT 168 +#define EVP_F_FIPS_MD_CTX_COPY 169 +#define EVP_F_HMAC_INIT_EX 174 #define EVP_F_INT_CTX_NEW 157 #define EVP_F_PKCS5_PBE_KEYIVGEN 117 #define EVP_F_PKCS5_V2_PBE_KEYIVGEN 118 +#define EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN 164 #define EVP_F_PKCS8_SET_BROKEN 112 #define EVP_F_PKEY_SET_TYPE 158 #define EVP_F_RC2_MAGIC_TO_METH 109 #define EVP_F_RC5_CTRL 125 /* Reason codes. */ +#define EVP_R_AES_IV_SETUP_FAILED 162 #define EVP_R_AES_KEY_SETUP_FAILED 143 #define EVP_R_ASN1_LIB 140 #define EVP_R_BAD_BLOCK_LENGTH 136 @@ -1272,16 +1351,21 @@ void ERR_load_EVP_strings(void); #define EVP_R_DECODE_ERROR 114 #define EVP_R_DIFFERENT_KEY_TYPES 101 #define EVP_R_DIFFERENT_PARAMETERS 153 +#define EVP_R_DISABLED_FOR_FIPS 163 #define EVP_R_ENCODE_ERROR 115 +#define EVP_R_ERROR_LOADING_SECTION 165 +#define EVP_R_ERROR_SETTING_FIPS_MODE 166 #define EVP_R_EVP_PBE_CIPHERINIT_ERROR 119 #define EVP_R_EXPECTING_AN_RSA_KEY 127 #define EVP_R_EXPECTING_A_DH_KEY 128 #define EVP_R_EXPECTING_A_DSA_KEY 129 #define EVP_R_EXPECTING_A_ECDSA_KEY 141 #define EVP_R_EXPECTING_A_EC_KEY 142 +#define EVP_R_FIPS_MODE_NOT_SUPPORTED 167 #define EVP_R_INITIALIZATION_ERROR 134 #define EVP_R_INPUT_NOT_INITIALIZED 111 #define EVP_R_INVALID_DIGEST 152 +#define EVP_R_INVALID_FIPS_MODE 168 #define EVP_R_INVALID_KEY_LENGTH 130 #define EVP_R_INVALID_OPERATION 148 #define EVP_R_IV_TOO_LARGE 102 @@ -1303,8 +1387,10 @@ void ERR_load_EVP_strings(void); #define EVP_R_PRIVATE_KEY_DECODE_ERROR 145 #define EVP_R_PRIVATE_KEY_ENCODE_ERROR 146 #define EVP_R_PUBLIC_KEY_NOT_RSA 106 +#define EVP_R_TOO_LARGE 164 #define EVP_R_UNKNOWN_CIPHER 160 #define EVP_R_UNKNOWN_DIGEST 161 +#define EVP_R_UNKNOWN_OPTION 169 #define EVP_R_UNKNOWN_PBE_ALGORITHM 121 #define EVP_R_UNSUPORTED_NUMBER_OF_ROUNDS 135 #define EVP_R_UNSUPPORTED_ALGORITHM 156 diff --git a/app/openssl/crypto/evp/evp_cnf.c b/app/openssl/crypto/evp/evp_cnf.c new file mode 100644 index 00000000..2e4db302 --- /dev/null +++ b/app/openssl/crypto/evp/evp_cnf.c @@ -0,0 +1,125 @@ +/* evp_cnf.c */ +/* Written by Stephen Henson (steve@openssl.org) for the OpenSSL + * project 2007. + */ +/* ==================================================================== + * Copyright (c) 2007 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include +#include +#include "cryptlib.h" +#include +#include +#include +#include +#ifdef OPENSSL_FIPS +#include +#endif + + +/* Algorithm configuration module. */ + +static int alg_module_init(CONF_IMODULE *md, const CONF *cnf) + { + int i; + const char *oid_section; + STACK_OF(CONF_VALUE) *sktmp; + CONF_VALUE *oval; + oid_section = CONF_imodule_get_value(md); + if(!(sktmp = NCONF_get_section(cnf, oid_section))) + { + EVPerr(EVP_F_ALG_MODULE_INIT, EVP_R_ERROR_LOADING_SECTION); + return 0; + } + for(i = 0; i < sk_CONF_VALUE_num(sktmp); i++) + { + oval = sk_CONF_VALUE_value(sktmp, i); + if (!strcmp(oval->name, "fips_mode")) + { + int m; + if (!X509V3_get_value_bool(oval, &m)) + { + EVPerr(EVP_F_ALG_MODULE_INIT, EVP_R_INVALID_FIPS_MODE); + return 0; + } + if (m > 0) + { +#ifdef OPENSSL_FIPS + if (!FIPS_mode() && !FIPS_mode_set(1)) + { + EVPerr(EVP_F_ALG_MODULE_INIT, EVP_R_ERROR_SETTING_FIPS_MODE); + return 0; + } +#else + EVPerr(EVP_F_ALG_MODULE_INIT, EVP_R_FIPS_MODE_NOT_SUPPORTED); + return 0; +#endif + } + } + else + { + EVPerr(EVP_F_ALG_MODULE_INIT, EVP_R_UNKNOWN_OPTION); + ERR_add_error_data(4, "name=", oval->name, + ", value=", oval->value); + } + + } + return 1; + } + +void EVP_add_alg_module(void) + { + CONF_module_add("alg_section", alg_module_init, 0); + } diff --git a/app/openssl/crypto/evp/evp_enc.c b/app/openssl/crypto/evp/evp_enc.c index c268d25c..0c54f05e 100644 --- a/app/openssl/crypto/evp/evp_enc.c +++ b/app/openssl/crypto/evp/evp_enc.c @@ -64,8 +64,18 @@ #ifndef OPENSSL_NO_ENGINE #include #endif +#ifdef OPENSSL_FIPS +#include +#endif #include "evp_locl.h" +#ifdef OPENSSL_FIPS +#define M_do_cipher(ctx, out, in, inl) FIPS_cipher(ctx, out, in, inl) +#else +#define M_do_cipher(ctx, out, in, inl) ctx->cipher->do_cipher(ctx, out, in, inl) +#endif + + const char EVP_version[]="EVP" OPENSSL_VERSION_PTEXT; void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *ctx) @@ -115,10 +125,14 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *imp /* Ensure a context left lying around from last time is cleared * (the previous check attempted to avoid this if the same * ENGINE and EVP_CIPHER could be used). */ - EVP_CIPHER_CTX_cleanup(ctx); - - /* Restore encrypt field: it is zeroed by cleanup */ - ctx->encrypt = enc; + if (ctx->cipher) + { + unsigned long flags = ctx->flags; + EVP_CIPHER_CTX_cleanup(ctx); + /* Restore encrypt and flags */ + ctx->encrypt = enc; + ctx->flags = flags; + } #ifndef OPENSSL_NO_ENGINE if(impl) { @@ -155,6 +169,10 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *imp ctx->engine = NULL; #endif +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_cipherinit(ctx, cipher, key, iv, enc); +#endif ctx->cipher=cipher; if (ctx->cipher->ctx_size) { @@ -187,6 +205,10 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *imp } #ifndef OPENSSL_NO_ENGINE skip_to_init: +#endif +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_cipherinit(ctx, cipher, key, iv, enc); #endif /* we assume block size is a power of 2 in *cryptUpdate */ OPENSSL_assert(ctx->cipher->block_size == 1 @@ -214,6 +236,13 @@ skip_to_init: memcpy(ctx->iv, ctx->oiv, EVP_CIPHER_CTX_iv_length(ctx)); break; + case EVP_CIPH_CTR_MODE: + ctx->num = 0; + /* Don't reuse IV for CTR mode */ + if(iv) + memcpy(ctx->iv, iv, EVP_CIPHER_CTX_iv_length(ctx)); + break; + default: return 0; break; @@ -280,6 +309,16 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, { int i,j,bl; + if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) + { + i = M_do_cipher(ctx, out, in, inl); + if (i < 0) + return 0; + else + *outl = i; + return 1; + } + if (inl <= 0) { *outl = 0; @@ -288,7 +327,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, if(ctx->buf_len == 0 && (inl&(ctx->block_mask)) == 0) { - if(ctx->cipher->do_cipher(ctx,out,in,inl)) + if(M_do_cipher(ctx,out,in,inl)) { *outl=inl; return 1; @@ -315,7 +354,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, { j=bl-i; memcpy(&(ctx->buf[i]),in,j); - if(!ctx->cipher->do_cipher(ctx,out,ctx->buf,bl)) return 0; + if(!M_do_cipher(ctx,out,ctx->buf,bl)) return 0; inl-=j; in+=j; out+=bl; @@ -328,7 +367,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, inl-=i; if (inl > 0) { - if(!ctx->cipher->do_cipher(ctx,out,in,inl)) return 0; + if(!M_do_cipher(ctx,out,in,inl)) return 0; *outl+=inl; } @@ -350,6 +389,16 @@ int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int n,ret; unsigned int i, b, bl; + if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) + { + ret = M_do_cipher(ctx, out, NULL, 0); + if (ret < 0) + return 0; + else + *outl = ret; + return 1; + } + b=ctx->cipher->block_size; OPENSSL_assert(b <= sizeof ctx->buf); if (b == 1) @@ -372,7 +421,7 @@ int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) n=b-bl; for (i=bl; ibuf[i]=n; - ret=ctx->cipher->do_cipher(ctx,out,ctx->buf,b); + ret=M_do_cipher(ctx,out,ctx->buf,b); if(ret) @@ -387,6 +436,19 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, int fix_len; unsigned int b; + if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) + { + fix_len = M_do_cipher(ctx, out, in, inl); + if (fix_len < 0) + { + *outl = 0; + return 0; + } + else + *outl = fix_len; + return 1; + } + if (inl <= 0) { *outl = 0; @@ -440,8 +502,18 @@ int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) { int i,n; unsigned int b; - *outl=0; + + if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) + { + i = M_do_cipher(ctx, out, NULL, 0); + if (i < 0) + return 0; + else + *outl = i; + return 1; + } + b=ctx->cipher->block_size; if (ctx->flags & EVP_CIPH_NO_PADDING) { @@ -496,6 +568,7 @@ void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *ctx) int EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *c) { +#ifndef OPENSSL_FIPS if (c->cipher != NULL) { if(c->cipher->cleanup && !c->cipher->cleanup(c)) @@ -506,11 +579,15 @@ int EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *c) } if (c->cipher_data) OPENSSL_free(c->cipher_data); +#endif #ifndef OPENSSL_NO_ENGINE if (c->engine) /* The EVP_CIPHER we used belongs to an ENGINE, release the * functional reference we held for this reason. */ ENGINE_finish(c->engine); +#endif +#ifdef OPENSSL_FIPS + FIPS_cipher_ctx_cleanup(c); #endif memset(c,0,sizeof(EVP_CIPHER_CTX)); return 1; diff --git a/app/openssl/crypto/evp/evp_err.c b/app/openssl/crypto/evp/evp_err.c index d8bfec09..08eab988 100644 --- a/app/openssl/crypto/evp/evp_err.c +++ b/app/openssl/crypto/evp/evp_err.c @@ -1,6 +1,6 @@ /* crypto/evp/evp_err.c */ /* ==================================================================== - * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,8 +70,14 @@ static ERR_STRING_DATA EVP_str_functs[]= { +{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, +{ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, +{ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, +{ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, +{ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, +{ERR_FUNC(EVP_F_CMAC_INIT), "CMAC_INIT"}, {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, {ERR_FUNC(EVP_F_DO_SIGVER_INIT), "DO_SIGVER_INIT"}, {ERR_FUNC(EVP_F_DSAPKEY2PKCS8), "DSAPKEY2PKCS8"}, @@ -86,7 +92,7 @@ static ERR_STRING_DATA EVP_str_functs[]= {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"}, {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"}, {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"}, -{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"}, +{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"}, {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"}, {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"}, {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"}, @@ -126,9 +132,17 @@ static ERR_STRING_DATA EVP_str_functs[]= {ERR_FUNC(EVP_F_EVP_RIJNDAEL), "EVP_RIJNDAEL"}, {ERR_FUNC(EVP_F_EVP_SIGNFINAL), "EVP_SignFinal"}, {ERR_FUNC(EVP_F_EVP_VERIFYFINAL), "EVP_VerifyFinal"}, +{ERR_FUNC(EVP_F_FIPS_CIPHERINIT), "FIPS_CIPHERINIT"}, +{ERR_FUNC(EVP_F_FIPS_CIPHER_CTX_COPY), "FIPS_CIPHER_CTX_COPY"}, +{ERR_FUNC(EVP_F_FIPS_CIPHER_CTX_CTRL), "FIPS_CIPHER_CTX_CTRL"}, +{ERR_FUNC(EVP_F_FIPS_CIPHER_CTX_SET_KEY_LENGTH), "FIPS_CIPHER_CTX_SET_KEY_LENGTH"}, +{ERR_FUNC(EVP_F_FIPS_DIGESTINIT), "FIPS_DIGESTINIT"}, +{ERR_FUNC(EVP_F_FIPS_MD_CTX_COPY), "FIPS_MD_CTX_COPY"}, +{ERR_FUNC(EVP_F_HMAC_INIT_EX), "HMAC_Init_ex"}, {ERR_FUNC(EVP_F_INT_CTX_NEW), "INT_CTX_NEW"}, {ERR_FUNC(EVP_F_PKCS5_PBE_KEYIVGEN), "PKCS5_PBE_keyivgen"}, {ERR_FUNC(EVP_F_PKCS5_V2_PBE_KEYIVGEN), "PKCS5_v2_PBE_keyivgen"}, +{ERR_FUNC(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN), "PKCS5_V2_PBKDF2_KEYIVGEN"}, {ERR_FUNC(EVP_F_PKCS8_SET_BROKEN), "PKCS8_set_broken"}, {ERR_FUNC(EVP_F_PKEY_SET_TYPE), "PKEY_SET_TYPE"}, {ERR_FUNC(EVP_F_RC2_MAGIC_TO_METH), "RC2_MAGIC_TO_METH"}, @@ -138,6 +152,7 @@ static ERR_STRING_DATA EVP_str_functs[]= static ERR_STRING_DATA EVP_str_reasons[]= { +{ERR_REASON(EVP_R_AES_IV_SETUP_FAILED) ,"aes iv setup failed"}, {ERR_REASON(EVP_R_AES_KEY_SETUP_FAILED) ,"aes key setup failed"}, {ERR_REASON(EVP_R_ASN1_LIB) ,"asn1 lib"}, {ERR_REASON(EVP_R_BAD_BLOCK_LENGTH) ,"bad block length"}, @@ -155,16 +170,21 @@ static ERR_STRING_DATA EVP_str_reasons[]= {ERR_REASON(EVP_R_DECODE_ERROR) ,"decode error"}, {ERR_REASON(EVP_R_DIFFERENT_KEY_TYPES) ,"different key types"}, {ERR_REASON(EVP_R_DIFFERENT_PARAMETERS) ,"different parameters"}, +{ERR_REASON(EVP_R_DISABLED_FOR_FIPS) ,"disabled for fips"}, {ERR_REASON(EVP_R_ENCODE_ERROR) ,"encode error"}, +{ERR_REASON(EVP_R_ERROR_LOADING_SECTION) ,"error loading section"}, +{ERR_REASON(EVP_R_ERROR_SETTING_FIPS_MODE),"error setting fips mode"}, {ERR_REASON(EVP_R_EVP_PBE_CIPHERINIT_ERROR),"evp pbe cipherinit error"}, {ERR_REASON(EVP_R_EXPECTING_AN_RSA_KEY) ,"expecting an rsa key"}, {ERR_REASON(EVP_R_EXPECTING_A_DH_KEY) ,"expecting a dh key"}, {ERR_REASON(EVP_R_EXPECTING_A_DSA_KEY) ,"expecting a dsa key"}, {ERR_REASON(EVP_R_EXPECTING_A_ECDSA_KEY) ,"expecting a ecdsa key"}, {ERR_REASON(EVP_R_EXPECTING_A_EC_KEY) ,"expecting a ec key"}, +{ERR_REASON(EVP_R_FIPS_MODE_NOT_SUPPORTED),"fips mode not supported"}, {ERR_REASON(EVP_R_INITIALIZATION_ERROR) ,"initialization error"}, {ERR_REASON(EVP_R_INPUT_NOT_INITIALIZED) ,"input not initialized"}, {ERR_REASON(EVP_R_INVALID_DIGEST) ,"invalid digest"}, +{ERR_REASON(EVP_R_INVALID_FIPS_MODE) ,"invalid fips mode"}, {ERR_REASON(EVP_R_INVALID_KEY_LENGTH) ,"invalid key length"}, {ERR_REASON(EVP_R_INVALID_OPERATION) ,"invalid operation"}, {ERR_REASON(EVP_R_IV_TOO_LARGE) ,"iv too large"}, @@ -186,8 +206,10 @@ static ERR_STRING_DATA EVP_str_reasons[]= {ERR_REASON(EVP_R_PRIVATE_KEY_DECODE_ERROR),"private key decode error"}, {ERR_REASON(EVP_R_PRIVATE_KEY_ENCODE_ERROR),"private key encode error"}, {ERR_REASON(EVP_R_PUBLIC_KEY_NOT_RSA) ,"public key not rsa"}, +{ERR_REASON(EVP_R_TOO_LARGE) ,"too large"}, {ERR_REASON(EVP_R_UNKNOWN_CIPHER) ,"unknown cipher"}, {ERR_REASON(EVP_R_UNKNOWN_DIGEST) ,"unknown digest"}, +{ERR_REASON(EVP_R_UNKNOWN_OPTION) ,"unknown option"}, {ERR_REASON(EVP_R_UNKNOWN_PBE_ALGORITHM) ,"unknown pbe algorithm"}, {ERR_REASON(EVP_R_UNSUPORTED_NUMBER_OF_ROUNDS),"unsuported number of rounds"}, {ERR_REASON(EVP_R_UNSUPPORTED_ALGORITHM) ,"unsupported algorithm"}, diff --git a/app/openssl/crypto/evp/evp_key.c b/app/openssl/crypto/evp/evp_key.c index 839d6a3a..7961fbeb 100644 --- a/app/openssl/crypto/evp/evp_key.c +++ b/app/openssl/crypto/evp/evp_key.c @@ -120,7 +120,7 @@ int EVP_BytesToKey(const EVP_CIPHER *type, const EVP_MD *md, unsigned char md_buf[EVP_MAX_MD_SIZE]; int niv,nkey,addmd=0; unsigned int mds=0,i; - + int rv = 0; nkey=type->key_len; niv=type->iv_len; OPENSSL_assert(nkey <= EVP_MAX_KEY_LENGTH); @@ -134,17 +134,24 @@ int EVP_BytesToKey(const EVP_CIPHER *type, const EVP_MD *md, if (!EVP_DigestInit_ex(&c,md, NULL)) return 0; if (addmd++) - EVP_DigestUpdate(&c,&(md_buf[0]),mds); - EVP_DigestUpdate(&c,data,datal); + if (!EVP_DigestUpdate(&c,&(md_buf[0]),mds)) + goto err; + if (!EVP_DigestUpdate(&c,data,datal)) + goto err; if (salt != NULL) - EVP_DigestUpdate(&c,salt,PKCS5_SALT_LEN); - EVP_DigestFinal_ex(&c,&(md_buf[0]),&mds); + if (!EVP_DigestUpdate(&c,salt,PKCS5_SALT_LEN)) + goto err; + if (!EVP_DigestFinal_ex(&c,&(md_buf[0]),&mds)) + goto err; for (i=1; i<(unsigned int)count; i++) { - EVP_DigestInit_ex(&c,md, NULL); - EVP_DigestUpdate(&c,&(md_buf[0]),mds); - EVP_DigestFinal_ex(&c,&(md_buf[0]),&mds); + if (!EVP_DigestInit_ex(&c,md, NULL)) + goto err; + if (!EVP_DigestUpdate(&c,&(md_buf[0]),mds)) + goto err; + if (!EVP_DigestFinal_ex(&c,&(md_buf[0]),&mds)) + goto err; } i=0; if (nkey) @@ -173,8 +180,10 @@ int EVP_BytesToKey(const EVP_CIPHER *type, const EVP_MD *md, } if ((nkey == 0) && (niv == 0)) break; } + rv = type->key_len; + err: EVP_MD_CTX_cleanup(&c); OPENSSL_cleanse(&(md_buf[0]),EVP_MAX_MD_SIZE); - return(type->key_len); + return rv; } diff --git a/app/openssl/crypto/evp/evp_lib.c b/app/openssl/crypto/evp/evp_lib.c index 40951a04..b180e482 100644 --- a/app/openssl/crypto/evp/evp_lib.c +++ b/app/openssl/crypto/evp/evp_lib.c @@ -67,6 +67,8 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type) if (c->cipher->set_asn1_parameters != NULL) ret=c->cipher->set_asn1_parameters(c,type); + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) + ret=EVP_CIPHER_set_asn1_iv(c, type); else ret=-1; return(ret); @@ -78,6 +80,8 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type) if (c->cipher->get_asn1_parameters != NULL) ret=c->cipher->get_asn1_parameters(c,type); + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) + ret=EVP_CIPHER_get_asn1_iv(c, type); else ret=-1; return(ret); diff --git a/app/openssl/crypto/evp/evp_locl.h b/app/openssl/crypto/evp/evp_locl.h index 292d74c1..08c0a66d 100644 --- a/app/openssl/crypto/evp/evp_locl.h +++ b/app/openssl/crypto/evp/evp_locl.h @@ -343,3 +343,43 @@ struct evp_pkey_method_st } /* EVP_PKEY_METHOD */; void evp_pkey_set_cb_translate(BN_GENCB *cb, EVP_PKEY_CTX *ctx); + +int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, + const EVP_CIPHER *c, const EVP_MD *md, int en_de); + +#ifdef OPENSSL_FIPS + +#ifdef OPENSSL_DOING_MAKEDEPEND +#undef SHA1_Init +#undef SHA1_Update +#undef SHA224_Init +#undef SHA256_Init +#undef SHA384_Init +#undef SHA512_Init +#undef DES_set_key_unchecked +#endif + +#define RIPEMD160_Init private_RIPEMD160_Init +#define WHIRLPOOL_Init private_WHIRLPOOL_Init +#define MD5_Init private_MD5_Init +#define MD4_Init private_MD4_Init +#define MD2_Init private_MD2_Init +#define MDC2_Init private_MDC2_Init +#define SHA_Init private_SHA_Init +#define SHA1_Init private_SHA1_Init +#define SHA224_Init private_SHA224_Init +#define SHA256_Init private_SHA256_Init +#define SHA384_Init private_SHA384_Init +#define SHA512_Init private_SHA512_Init + +#define BF_set_key private_BF_set_key +#define CAST_set_key private_CAST_set_key +#define idea_set_encrypt_key private_idea_set_encrypt_key +#define SEED_set_key private_SEED_set_key +#define RC2_set_key private_RC2_set_key +#define RC4_set_key private_RC4_set_key +#define DES_set_key_unchecked private_DES_set_key_unchecked +#define Camellia_set_key private_Camellia_set_key + +#endif diff --git a/app/openssl/crypto/evp/evp_pbe.c b/app/openssl/crypto/evp/evp_pbe.c index c9d932d2..f8c32d82 100644 --- a/app/openssl/crypto/evp/evp_pbe.c +++ b/app/openssl/crypto/evp/evp_pbe.c @@ -61,6 +61,7 @@ #include #include #include +#include "evp_locl.h" /* Password based encryption (PBE) functions */ @@ -87,6 +88,10 @@ static const EVP_PBE_CTL builtin_pbe[] = {EVP_PBE_TYPE_OUTER, NID_pbeWithSHA1AndRC2_CBC, NID_rc2_64_cbc, NID_sha1, PKCS5_PBE_keyivgen}, +#ifndef OPENSSL_NO_HMAC + {EVP_PBE_TYPE_OUTER, NID_id_pbkdf2, -1, -1, PKCS5_v2_PBKDF2_keyivgen}, +#endif + {EVP_PBE_TYPE_OUTER, NID_pbe_WithSHA1And128BitRC4, NID_rc4, NID_sha1, PKCS12_PBE_keyivgen}, {EVP_PBE_TYPE_OUTER, NID_pbe_WithSHA1And40BitRC4, diff --git a/app/openssl/crypto/evp/evptests.txt b/app/openssl/crypto/evp/evptests.txt index beb12144..c273707c 100644 --- a/app/openssl/crypto/evp/evptests.txt +++ b/app/openssl/crypto/evp/evptests.txt @@ -158,6 +158,19 @@ AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:B7B AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E1C656305ED1A7A6563805746FE03EDC:30C81C46A35CE411E5FBC1191A0A52EF:71AB47A086E86EEDF39D1C5BBA97C408:0 AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:41635BE625B48AFC1666DD42A09D96E7:F69F2445DF4F9B17AD2B417BE66C3710:0126141D67F37BE8538F5A8BE740E484:0 +# AES Counter test vectors from RFC3686 +aes-128-ctr:AE6852F8121067CC4BF7A5765577F39E:00000030000000000000000000000001:53696E676C6520626C6F636B206D7367:E4095D4FB7A7B3792D6175A3261311B8:1 +aes-128-ctr:7E24067817FAE0D743D6CE1F32539163:006CB6DBC0543B59DA48D90B00000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:5104A106168A72D9790D41EE8EDAD388EB2E1EFC46DA57C8FCE630DF9141BE28:1 +aes-128-ctr:7691BE035E5020A8AC6E618529F9A0DC:00E0017B27777F3F4A1786F000000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:C1CF48A89F2FFDD9CF4652E9EFDB72D74540A42BDE6D7836D59A5CEAAEF3105325B2072F:1 + +aes-192-ctr:16AF5B145FC9F579C175F93E3BFB0EED863D06CCFDB78515:0000004836733C147D6D93CB00000001:53696E676C6520626C6F636B206D7367:4B55384FE259C9C84E7935A003CBE928:1 +aes-192-ctr:7C5CB2401B3DC33C19E7340819E0F69C678C3DB8E6F6A91A:0096B03B020C6EADC2CB500D00000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:453243FC609B23327EDFAAFA7131CD9F8490701C5AD4A79CFC1FE0FF42F4FB00:1 +aes-192-ctr:02BF391EE8ECB159B959617B0965279BF59B60A786D3E0FE:0007BDFD5CBD60278DCC091200000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:96893FC55E5C722F540B7DD1DDF7E758D288BC95C69165884536C811662F2188ABEE0935:1 + +aes-256-ctr:776BEFF2851DB06F4C8A0542C8696F6C6A81AF1EEC96B4D37FC1D689E6C1C104:00000060DB5672C97AA8F0B200000001:53696E676C6520626C6F636B206D7367:145AD01DBF824EC7560863DC71E3E0C0:1 +aes-256-ctr:F6D66D6BD52D59BB0796365879EFF886C66DD51A5B6A99744B50590C87A23884:00FAAC24C1585EF15A43D87500000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:F05E231B3894612C49EE000B804EB2A9B8306B508F839D6A5530831D9344AF1C:1 +aes-256-ctr:FF7A617CE69148E4F1726E2F43581DE2AA62D9F805532EDFF1EED687FB54153D:001CC5B751A51D70A1C1114800000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:EB6C52821D0BBBF7CE7594462ACA4FAAB407DF866569FD07F48CC0B583D6071F1EC0E6B8:1 + # DES ECB tests (from destest) DES-ECB:0000000000000000::0000000000000000:8CA64DE9C1B123A7 diff --git a/app/openssl/crypto/evp/m_dss.c b/app/openssl/crypto/evp/m_dss.c index 48c26895..6fb7e9a8 100644 --- a/app/openssl/crypto/evp/m_dss.c +++ b/app/openssl/crypto/evp/m_dss.c @@ -60,12 +60,13 @@ #include "cryptlib.h" #include #include -#include +#include #ifndef OPENSSL_NO_DSA #include #endif #ifndef OPENSSL_NO_SHA +#ifndef OPENSSL_FIPS static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); } @@ -97,3 +98,4 @@ const EVP_MD *EVP_dss(void) return(&dsa_md); } #endif +#endif diff --git a/app/openssl/crypto/evp/m_dss1.c b/app/openssl/crypto/evp/m_dss1.c index 4f03fb70..2df362a6 100644 --- a/app/openssl/crypto/evp/m_dss1.c +++ b/app/openssl/crypto/evp/m_dss1.c @@ -63,11 +63,13 @@ #include #include -#include +#include #ifndef OPENSSL_NO_DSA #include #endif +#ifndef OPENSSL_FIPS + static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); } @@ -98,3 +100,4 @@ const EVP_MD *EVP_dss1(void) return(&dss1_md); } #endif +#endif diff --git a/app/openssl/crypto/evp/m_ecdsa.c b/app/openssl/crypto/evp/m_ecdsa.c index 8d87a49e..4b15fb0f 100644 --- a/app/openssl/crypto/evp/m_ecdsa.c +++ b/app/openssl/crypto/evp/m_ecdsa.c @@ -116,6 +116,8 @@ #include #ifndef OPENSSL_NO_SHA +#ifndef OPENSSL_FIPS + static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); } @@ -146,3 +148,4 @@ const EVP_MD *EVP_ecdsa(void) return(&ecdsa_md); } #endif +#endif diff --git a/app/openssl/crypto/evp/m_md4.c b/app/openssl/crypto/evp/m_md4.c index 1e0b7c5b..6d47f61b 100644 --- a/app/openssl/crypto/evp/m_md4.c +++ b/app/openssl/crypto/evp/m_md4.c @@ -69,6 +69,8 @@ #include #endif +#include "evp_locl.h" + static int init(EVP_MD_CTX *ctx) { return MD4_Init(ctx->md_data); } diff --git a/app/openssl/crypto/evp/m_md5.c b/app/openssl/crypto/evp/m_md5.c index 63c14211..9a8bae02 100644 --- a/app/openssl/crypto/evp/m_md5.c +++ b/app/openssl/crypto/evp/m_md5.c @@ -68,6 +68,7 @@ #ifndef OPENSSL_NO_RSA #include #endif +#include "evp_locl.h" static int init(EVP_MD_CTX *ctx) { return MD5_Init(ctx->md_data); } diff --git a/app/openssl/crypto/evp/m_mdc2.c b/app/openssl/crypto/evp/m_mdc2.c index b08d5598..3602bed3 100644 --- a/app/openssl/crypto/evp/m_mdc2.c +++ b/app/openssl/crypto/evp/m_mdc2.c @@ -69,6 +69,8 @@ #include #endif +#include "evp_locl.h" + static int init(EVP_MD_CTX *ctx) { return MDC2_Init(ctx->md_data); } diff --git a/app/openssl/crypto/evp/m_ripemd.c b/app/openssl/crypto/evp/m_ripemd.c index a1d60ee7..7bf4804c 100644 --- a/app/openssl/crypto/evp/m_ripemd.c +++ b/app/openssl/crypto/evp/m_ripemd.c @@ -68,6 +68,7 @@ #ifndef OPENSSL_NO_RSA #include #endif +#include "evp_locl.h" static int init(EVP_MD_CTX *ctx) { return RIPEMD160_Init(ctx->md_data); } diff --git a/app/openssl/crypto/evp/m_sha1.c b/app/openssl/crypto/evp/m_sha1.c index 9a2790fd..bd0c01ad 100644 --- a/app/openssl/crypto/evp/m_sha1.c +++ b/app/openssl/crypto/evp/m_sha1.c @@ -59,15 +59,18 @@ #include #include "cryptlib.h" +#ifndef OPENSSL_FIPS + #ifndef OPENSSL_NO_SHA #include #include -#include +#include #ifndef OPENSSL_NO_RSA #include #endif + static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); } @@ -202,3 +205,5 @@ static const EVP_MD sha512_md= const EVP_MD *EVP_sha512(void) { return(&sha512_md); } #endif /* ifndef OPENSSL_NO_SHA512 */ + +#endif diff --git a/app/openssl/crypto/evp/m_wp.c b/app/openssl/crypto/evp/m_wp.c index 1ce47c04..c51bc2d5 100644 --- a/app/openssl/crypto/evp/m_wp.c +++ b/app/openssl/crypto/evp/m_wp.c @@ -9,6 +9,7 @@ #include #include #include +#include "evp_locl.h" static int init(EVP_MD_CTX *ctx) { return WHIRLPOOL_Init(ctx->md_data); } diff --git a/app/openssl/crypto/evp/names.c b/app/openssl/crypto/evp/names.c index f2869f5c..6311ad7c 100644 --- a/app/openssl/crypto/evp/names.c +++ b/app/openssl/crypto/evp/names.c @@ -66,6 +66,10 @@ int EVP_add_cipher(const EVP_CIPHER *c) { int r; + if (c == NULL) return 0; + + OPENSSL_init(); + r=OBJ_NAME_add(OBJ_nid2sn(c->nid),OBJ_NAME_TYPE_CIPHER_METH,(const char *)c); if (r == 0) return(0); check_defer(c->nid); @@ -78,6 +82,7 @@ int EVP_add_digest(const EVP_MD *md) { int r; const char *name; + OPENSSL_init(); name=OBJ_nid2sn(md->type); r=OBJ_NAME_add(name,OBJ_NAME_TYPE_MD_METH,(const char *)md); diff --git a/app/openssl/crypto/evp/p5_crpt.c b/app/openssl/crypto/evp/p5_crpt.c index 7ecfa8da..294cc90d 100644 --- a/app/openssl/crypto/evp/p5_crpt.c +++ b/app/openssl/crypto/evp/p5_crpt.c @@ -82,6 +82,8 @@ int PKCS5_PBE_keyivgen(EVP_CIPHER_CTX *cctx, const char *pass, int passlen, unsigned char *salt; const unsigned char *pbuf; int mdsize; + int rv = 0; + EVP_MD_CTX_init(&ctx); /* Extract useful info from parameter */ if (param == NULL || param->type != V_ASN1_SEQUENCE || @@ -104,29 +106,38 @@ int PKCS5_PBE_keyivgen(EVP_CIPHER_CTX *cctx, const char *pass, int passlen, if(!pass) passlen = 0; else if(passlen == -1) passlen = strlen(pass); - EVP_MD_CTX_init(&ctx); - EVP_DigestInit_ex(&ctx, md, NULL); - EVP_DigestUpdate(&ctx, pass, passlen); - EVP_DigestUpdate(&ctx, salt, saltlen); + if (!EVP_DigestInit_ex(&ctx, md, NULL)) + goto err; + if (!EVP_DigestUpdate(&ctx, pass, passlen)) + goto err; + if (!EVP_DigestUpdate(&ctx, salt, saltlen)) + goto err; PBEPARAM_free(pbe); - EVP_DigestFinal_ex(&ctx, md_tmp, NULL); + if (!EVP_DigestFinal_ex(&ctx, md_tmp, NULL)) + goto err; mdsize = EVP_MD_size(md); if (mdsize < 0) return 0; for (i = 1; i < iter; i++) { - EVP_DigestInit_ex(&ctx, md, NULL); - EVP_DigestUpdate(&ctx, md_tmp, mdsize); - EVP_DigestFinal_ex (&ctx, md_tmp, NULL); + if (!EVP_DigestInit_ex(&ctx, md, NULL)) + goto err; + if (!EVP_DigestUpdate(&ctx, md_tmp, mdsize)) + goto err; + if (!EVP_DigestFinal_ex (&ctx, md_tmp, NULL)) + goto err; } - EVP_MD_CTX_cleanup(&ctx); OPENSSL_assert(EVP_CIPHER_key_length(cipher) <= (int)sizeof(md_tmp)); memcpy(key, md_tmp, EVP_CIPHER_key_length(cipher)); OPENSSL_assert(EVP_CIPHER_iv_length(cipher) <= 16); memcpy(iv, md_tmp + (16 - EVP_CIPHER_iv_length(cipher)), EVP_CIPHER_iv_length(cipher)); - EVP_CipherInit_ex(cctx, cipher, NULL, key, iv, en_de); + if (!EVP_CipherInit_ex(cctx, cipher, NULL, key, iv, en_de)) + goto err; OPENSSL_cleanse(md_tmp, EVP_MAX_MD_SIZE); OPENSSL_cleanse(key, EVP_MAX_KEY_LENGTH); OPENSSL_cleanse(iv, EVP_MAX_IV_LENGTH); - return 1; + rv = 1; + err: + EVP_MD_CTX_cleanup(&ctx); + return rv; } diff --git a/app/openssl/crypto/evp/p5_crpt2.c b/app/openssl/crypto/evp/p5_crpt2.c index 334379f3..fe3c6c88 100644 --- a/app/openssl/crypto/evp/p5_crpt2.c +++ b/app/openssl/crypto/evp/p5_crpt2.c @@ -62,6 +62,7 @@ #include #include #include +#include "evp_locl.h" /* set this to print out info about the keygen algorithm */ /* #define DEBUG_PKCS5V2 */ @@ -84,19 +85,24 @@ int PKCS5_PBKDF2_HMAC(const char *pass, int passlen, unsigned char digtmp[EVP_MAX_MD_SIZE], *p, itmp[4]; int cplen, j, k, tkeylen, mdlen; unsigned long i = 1; - HMAC_CTX hctx; + HMAC_CTX hctx_tpl, hctx; mdlen = EVP_MD_size(digest); if (mdlen < 0) return 0; - HMAC_CTX_init(&hctx); + HMAC_CTX_init(&hctx_tpl); p = out; tkeylen = keylen; if(!pass) passlen = 0; else if(passlen == -1) passlen = strlen(pass); + if (!HMAC_Init_ex(&hctx_tpl, pass, passlen, digest, NULL)) + { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } while(tkeylen) { if(tkeylen > mdlen) @@ -110,15 +116,36 @@ int PKCS5_PBKDF2_HMAC(const char *pass, int passlen, itmp[1] = (unsigned char)((i >> 16) & 0xff); itmp[2] = (unsigned char)((i >> 8) & 0xff); itmp[3] = (unsigned char)(i & 0xff); - HMAC_Init_ex(&hctx, pass, passlen, digest, NULL); - HMAC_Update(&hctx, salt, saltlen); - HMAC_Update(&hctx, itmp, 4); - HMAC_Final(&hctx, digtmp, NULL); + if (!HMAC_CTX_copy(&hctx, &hctx_tpl)) + { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } + if (!HMAC_Update(&hctx, salt, saltlen) + || !HMAC_Update(&hctx, itmp, 4) + || !HMAC_Final(&hctx, digtmp, NULL)) + { + HMAC_CTX_cleanup(&hctx_tpl); + HMAC_CTX_cleanup(&hctx); + return 0; + } + HMAC_CTX_cleanup(&hctx); memcpy(p, digtmp, cplen); for(j = 1; j < iter; j++) { - HMAC(digest, pass, passlen, - digtmp, mdlen, digtmp, NULL); + if (!HMAC_CTX_copy(&hctx, &hctx_tpl)) + { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } + if (!HMAC_Update(&hctx, digtmp, mdlen) + || !HMAC_Final(&hctx, digtmp, NULL)) + { + HMAC_CTX_cleanup(&hctx_tpl); + HMAC_CTX_cleanup(&hctx); + return 0; + } + HMAC_CTX_cleanup(&hctx); for(k = 0; k < cplen; k++) p[k] ^= digtmp[k]; } @@ -126,7 +153,7 @@ int PKCS5_PBKDF2_HMAC(const char *pass, int passlen, i++; p+= cplen; } - HMAC_CTX_cleanup(&hctx); + HMAC_CTX_cleanup(&hctx_tpl); #ifdef DEBUG_PKCS5V2 fprintf(stderr, "Password:\n"); h__dump (pass, passlen); @@ -168,27 +195,24 @@ int PKCS5_v2_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, ASN1_TYPE *param, const EVP_CIPHER *c, const EVP_MD *md, int en_de) { - unsigned char *salt, key[EVP_MAX_KEY_LENGTH]; const unsigned char *pbuf; - int saltlen, iter, plen; - unsigned int keylen; + int plen; PBE2PARAM *pbe2 = NULL; const EVP_CIPHER *cipher; - PBKDF2PARAM *kdf = NULL; - const EVP_MD *prfmd; - int prf_nid, hmac_md_nid; + + int rv = 0; if (param == NULL || param->type != V_ASN1_SEQUENCE || param->value.sequence == NULL) { EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN,EVP_R_DECODE_ERROR); - return 0; + goto err; } pbuf = param->value.sequence->data; plen = param->value.sequence->length; if(!(pbe2 = d2i_PBE2PARAM(NULL, &pbuf, plen))) { EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN,EVP_R_DECODE_ERROR); - return 0; + goto err; } /* See if we recognise the key derivation function */ @@ -211,38 +235,63 @@ int PKCS5_v2_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, } /* Fixup cipher based on AlgorithmIdentifier */ - EVP_CipherInit_ex(ctx, cipher, NULL, NULL, NULL, en_de); + if (!EVP_CipherInit_ex(ctx, cipher, NULL, NULL, NULL, en_de)) + goto err; if(EVP_CIPHER_asn1_to_param(ctx, pbe2->encryption->parameter) < 0) { EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN, EVP_R_CIPHER_PARAMETER_ERROR); goto err; } + rv = PKCS5_v2_PBKDF2_keyivgen(ctx, pass, passlen, + pbe2->keyfunc->parameter, c, md, en_de); + err: + PBE2PARAM_free(pbe2); + return rv; +} + +int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, + const EVP_CIPHER *c, const EVP_MD *md, int en_de) +{ + unsigned char *salt, key[EVP_MAX_KEY_LENGTH]; + const unsigned char *pbuf; + int saltlen, iter, plen; + int rv = 0; + unsigned int keylen = 0; + int prf_nid, hmac_md_nid; + PBKDF2PARAM *kdf = NULL; + const EVP_MD *prfmd; + + if (EVP_CIPHER_CTX_cipher(ctx) == NULL) + { + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN,EVP_R_NO_CIPHER_SET); + goto err; + } keylen = EVP_CIPHER_CTX_key_length(ctx); OPENSSL_assert(keylen <= sizeof key); - /* Now decode key derivation function */ + /* Decode parameter */ - if(!pbe2->keyfunc->parameter || - (pbe2->keyfunc->parameter->type != V_ASN1_SEQUENCE)) + if(!param || (param->type != V_ASN1_SEQUENCE)) { - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN,EVP_R_DECODE_ERROR); + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN,EVP_R_DECODE_ERROR); goto err; } - pbuf = pbe2->keyfunc->parameter->value.sequence->data; - plen = pbe2->keyfunc->parameter->value.sequence->length; + pbuf = param->value.sequence->data; + plen = param->value.sequence->length; + if(!(kdf = d2i_PBKDF2PARAM(NULL, &pbuf, plen)) ) { - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN,EVP_R_DECODE_ERROR); + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN,EVP_R_DECODE_ERROR); goto err; } - PBE2PARAM_free(pbe2); - pbe2 = NULL; + keylen = EVP_CIPHER_CTX_key_length(ctx); /* Now check the parameters of the kdf */ if(kdf->keylength && (ASN1_INTEGER_get(kdf->keylength) != (int)keylen)){ - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN, + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN, EVP_R_UNSUPPORTED_KEYLENGTH); goto err; } @@ -254,19 +303,19 @@ int PKCS5_v2_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, if (!EVP_PBE_find(EVP_PBE_TYPE_PRF, prf_nid, NULL, &hmac_md_nid, 0)) { - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN, EVP_R_UNSUPPORTED_PRF); + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN, EVP_R_UNSUPPORTED_PRF); goto err; } prfmd = EVP_get_digestbynid(hmac_md_nid); if (prfmd == NULL) { - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN, EVP_R_UNSUPPORTED_PRF); + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN, EVP_R_UNSUPPORTED_PRF); goto err; } if(kdf->salt->type != V_ASN1_OCTET_STRING) { - EVPerr(EVP_F_PKCS5_V2_PBE_KEYIVGEN, + EVPerr(EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN, EVP_R_UNSUPPORTED_SALT_TYPE); goto err; } @@ -278,15 +327,11 @@ int PKCS5_v2_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, if(!PKCS5_PBKDF2_HMAC(pass, passlen, salt, saltlen, iter, prfmd, keylen, key)) goto err; - EVP_CipherInit_ex(ctx, NULL, NULL, key, NULL, en_de); - OPENSSL_cleanse(key, keylen); - PBKDF2PARAM_free(kdf); - return 1; - + rv = EVP_CipherInit_ex(ctx, NULL, NULL, key, NULL, en_de); err: - PBE2PARAM_free(pbe2); + OPENSSL_cleanse(key, keylen); PBKDF2PARAM_free(kdf); - return 0; + return rv; } #ifdef DEBUG_PKCS5V2 diff --git a/app/openssl/crypto/evp/p_lib.c b/app/openssl/crypto/evp/p_lib.c index e26ccd0d..bd1977d7 100644 --- a/app/openssl/crypto/evp/p_lib.c +++ b/app/openssl/crypto/evp/p_lib.c @@ -200,6 +200,12 @@ EVP_PKEY *EVP_PKEY_new(void) return(ret); } +EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey) + { + CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY); + return pkey; + } + /* Setup a public key ASN1 method and ENGINE from a NID or a string. * If pkey is NULL just return 1 or 0 if the algorithm exists. */ diff --git a/app/openssl/crypto/evp/p_open.c b/app/openssl/crypto/evp/p_open.c index 53a59a29..c748fbea 100644 --- a/app/openssl/crypto/evp/p_open.c +++ b/app/openssl/crypto/evp/p_open.c @@ -115,7 +115,8 @@ int EVP_OpenFinal(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int i; i=EVP_DecryptFinal_ex(ctx,out,outl); - EVP_DecryptInit_ex(ctx,NULL,NULL,NULL,NULL); + if (i) + i = EVP_DecryptInit_ex(ctx,NULL,NULL,NULL,NULL); return(i); } #else /* !OPENSSL_NO_RSA */ diff --git a/app/openssl/crypto/evp/p_seal.c b/app/openssl/crypto/evp/p_seal.c index d8324526..e5919b0f 100644 --- a/app/openssl/crypto/evp/p_seal.c +++ b/app/openssl/crypto/evp/p_seal.c @@ -110,6 +110,7 @@ int EVP_SealFinal(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) { int i; i = EVP_EncryptFinal_ex(ctx,out,outl); - EVP_EncryptInit_ex(ctx,NULL,NULL,NULL,NULL); + if (i) + i = EVP_EncryptInit_ex(ctx,NULL,NULL,NULL,NULL); return i; } diff --git a/app/openssl/crypto/evp/p_sign.c b/app/openssl/crypto/evp/p_sign.c index bb893f5b..8afb6643 100644 --- a/app/openssl/crypto/evp/p_sign.c +++ b/app/openssl/crypto/evp/p_sign.c @@ -80,18 +80,20 @@ int EVP_SignFinal(EVP_MD_CTX *ctx, unsigned char *sigret, unsigned int *siglen, { unsigned char m[EVP_MAX_MD_SIZE]; unsigned int m_len; - int i,ok=0,v; + int i = 0,ok = 0,v; EVP_MD_CTX tmp_ctx; + EVP_PKEY_CTX *pkctx = NULL; *siglen=0; EVP_MD_CTX_init(&tmp_ctx); - EVP_MD_CTX_copy_ex(&tmp_ctx,ctx); - EVP_DigestFinal_ex(&tmp_ctx,&(m[0]),&m_len); + if (!EVP_MD_CTX_copy_ex(&tmp_ctx,ctx)) + goto err; + if (!EVP_DigestFinal_ex(&tmp_ctx,&(m[0]),&m_len)) + goto err; EVP_MD_CTX_cleanup(&tmp_ctx); if (ctx->digest->flags & EVP_MD_FLAG_PKEY_METHOD_SIGNATURE) { - EVP_PKEY_CTX *pkctx = NULL; size_t sltmp = (size_t)EVP_PKEY_size(pkey); i = 0; pkctx = EVP_PKEY_CTX_new(pkey, NULL); diff --git a/app/openssl/crypto/evp/p_verify.c b/app/openssl/crypto/evp/p_verify.c index 41d4b671..c66d63cc 100644 --- a/app/openssl/crypto/evp/p_verify.c +++ b/app/openssl/crypto/evp/p_verify.c @@ -67,17 +67,19 @@ int EVP_VerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sigbuf, { unsigned char m[EVP_MAX_MD_SIZE]; unsigned int m_len; - int i,ok=0,v; + int i = 0,ok = 0,v; EVP_MD_CTX tmp_ctx; + EVP_PKEY_CTX *pkctx = NULL; EVP_MD_CTX_init(&tmp_ctx); - EVP_MD_CTX_copy_ex(&tmp_ctx,ctx); - EVP_DigestFinal_ex(&tmp_ctx,&(m[0]),&m_len); + if (!EVP_MD_CTX_copy_ex(&tmp_ctx,ctx)) + goto err; + if (!EVP_DigestFinal_ex(&tmp_ctx,&(m[0]),&m_len)) + goto err; EVP_MD_CTX_cleanup(&tmp_ctx); if (ctx->digest->flags & EVP_MD_FLAG_PKEY_METHOD_SIGNATURE) { - EVP_PKEY_CTX *pkctx = NULL; i = -1; pkctx = EVP_PKEY_CTX_new(pkey, NULL); if (!pkctx) diff --git a/app/openssl/crypto/evp/pmeth_gn.c b/app/openssl/crypto/evp/pmeth_gn.c index 5d74161a..4651c813 100644 --- a/app/openssl/crypto/evp/pmeth_gn.c +++ b/app/openssl/crypto/evp/pmeth_gn.c @@ -199,7 +199,7 @@ int EVP_PKEY_CTX_get_keygen_info(EVP_PKEY_CTX *ctx, int idx) } EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e, - unsigned char *key, int keylen) + const unsigned char *key, int keylen) { EVP_PKEY_CTX *mac_ctx = NULL; EVP_PKEY *mac_key = NULL; @@ -209,7 +209,8 @@ EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e, if (EVP_PKEY_keygen_init(mac_ctx) <= 0) goto merr; if (EVP_PKEY_CTX_ctrl(mac_ctx, -1, EVP_PKEY_OP_KEYGEN, - EVP_PKEY_CTRL_SET_MAC_KEY, keylen, key) <= 0) + EVP_PKEY_CTRL_SET_MAC_KEY, + keylen, (void *)key) <= 0) goto merr; if (EVP_PKEY_keygen(mac_ctx, &mac_key) <= 0) goto merr; diff --git a/app/openssl/crypto/evp/pmeth_lib.c b/app/openssl/crypto/evp/pmeth_lib.c index 5481d4b8..acfa7b6f 100644 --- a/app/openssl/crypto/evp/pmeth_lib.c +++ b/app/openssl/crypto/evp/pmeth_lib.c @@ -73,7 +73,7 @@ DECLARE_STACK_OF(EVP_PKEY_METHOD) STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL; extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth; -extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth; +extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth, cmac_pkey_meth; static const EVP_PKEY_METHOD *standard_methods[] = { @@ -90,6 +90,7 @@ static const EVP_PKEY_METHOD *standard_methods[] = &ec_pkey_meth, #endif &hmac_pkey_meth, + &cmac_pkey_meth }; DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *, @@ -203,6 +204,8 @@ EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags) if (!pmeth) return NULL; + memset(pmeth, 0, sizeof(EVP_PKEY_METHOD)); + pmeth->pkey_id = id; pmeth->flags = flags | EVP_PKEY_FLAG_DYNAMIC; @@ -235,6 +238,56 @@ EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags) return pmeth; } +void EVP_PKEY_meth_get0_info(int *ppkey_id, int *pflags, + const EVP_PKEY_METHOD *meth) + { + if (ppkey_id) + *ppkey_id = meth->pkey_id; + if (pflags) + *pflags = meth->flags; + } + +void EVP_PKEY_meth_copy(EVP_PKEY_METHOD *dst, const EVP_PKEY_METHOD *src) + { + + dst->init = src->init; + dst->copy = src->copy; + dst->cleanup = src->cleanup; + + dst->paramgen_init = src->paramgen_init; + dst->paramgen = src->paramgen; + + dst->keygen_init = src->keygen_init; + dst->keygen = src->keygen; + + dst->sign_init = src->sign_init; + dst->sign = src->sign; + + dst->verify_init = src->verify_init; + dst->verify = src->verify; + + dst->verify_recover_init = src->verify_recover_init; + dst->verify_recover = src->verify_recover; + + dst->signctx_init = src->signctx_init; + dst->signctx = src->signctx; + + dst->verifyctx_init = src->verifyctx_init; + dst->verifyctx = src->verifyctx; + + dst->encrypt_init = src->encrypt_init; + dst->encrypt = src->encrypt; + + dst->decrypt_init = src->decrypt_init; + dst->decrypt = src->decrypt; + + dst->derive_init = src->derive_init; + dst->derive = src->derive; + + dst->ctrl = src->ctrl; + dst->ctrl_str = src->ctrl_str; + } + void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth) { if (pmeth && (pmeth->flags & EVP_PKEY_FLAG_DYNAMIC)) diff --git a/app/openssl/crypto/hmac/hm_ameth.c b/app/openssl/crypto/hmac/hm_ameth.c index 6d8a8914..e03f24ae 100644 --- a/app/openssl/crypto/hmac/hm_ameth.c +++ b/app/openssl/crypto/hmac/hm_ameth.c @@ -153,7 +153,7 @@ const EVP_PKEY_ASN1_METHOD hmac_asn1_meth = hmac_size, 0, - 0,0,0,0,0,0, + 0,0,0,0,0,0,0, hmac_key_free, hmac_pkey_ctrl, diff --git a/app/openssl/crypto/hmac/hm_pmeth.c b/app/openssl/crypto/hmac/hm_pmeth.c index 71e8567a..0daa4451 100644 --- a/app/openssl/crypto/hmac/hm_pmeth.c +++ b/app/openssl/crypto/hmac/hm_pmeth.c @@ -100,7 +100,8 @@ static int pkey_hmac_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) dctx = dst->data; dctx->md = sctx->md; HMAC_CTX_init(&dctx->ctx); - HMAC_CTX_copy(&dctx->ctx, &sctx->ctx); + if (!HMAC_CTX_copy(&dctx->ctx, &sctx->ctx)) + return 0; if (sctx->ktmp.data) { if (!ASN1_OCTET_STRING_set(&dctx->ktmp, @@ -141,7 +142,8 @@ static int pkey_hmac_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) static int int_update(EVP_MD_CTX *ctx,const void *data,size_t count) { HMAC_PKEY_CTX *hctx = ctx->pctx->data; - HMAC_Update(&hctx->ctx, data, count); + if (!HMAC_Update(&hctx->ctx, data, count)) + return 0; return 1; } @@ -167,7 +169,8 @@ static int hmac_signctx(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, if (!sig) return 1; - HMAC_Final(&hctx->ctx, sig, &hlen); + if (!HMAC_Final(&hctx->ctx, sig, &hlen)) + return 0; *siglen = (size_t)hlen; return 1; } @@ -192,8 +195,9 @@ static int pkey_hmac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) case EVP_PKEY_CTRL_DIGESTINIT: key = (ASN1_OCTET_STRING *)ctx->pkey->pkey.ptr; - HMAC_Init_ex(&hctx->ctx, key->data, key->length, hctx->md, - ctx->engine); + if (!HMAC_Init_ex(&hctx->ctx, key->data, key->length, hctx->md, + ctx->engine)) + return 0; break; default: diff --git a/app/openssl/crypto/hmac/hmac.c b/app/openssl/crypto/hmac/hmac.c index 6c98fc43..ba27cbf5 100644 --- a/app/openssl/crypto/hmac/hmac.c +++ b/app/openssl/crypto/hmac/hmac.c @@ -61,12 +61,34 @@ #include "cryptlib.h" #include +#ifdef OPENSSL_FIPS +#include +#endif + int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, const EVP_MD *md, ENGINE *impl) { int i,j,reset=0; unsigned char pad[HMAC_MAX_MD_CBLOCK]; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + { + /* If we have an ENGINE need to allow non FIPS */ + if ((impl || ctx->i_ctx.engine) + && !(ctx->i_ctx.flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW)) + { + EVPerr(EVP_F_HMAC_INIT_EX, EVP_R_DISABLED_FOR_FIPS); + return 0; + } + /* Other algorithm blocking will be done in FIPS_cmac_init, + * via FIPS_hmac_init_ex(). + */ + if (!impl && !ctx->i_ctx.engine) + return FIPS_hmac_init_ex(ctx, key, len, md, NULL); + } +#endif + if (md != NULL) { reset=1; @@ -133,6 +155,10 @@ int HMAC_Init(HMAC_CTX *ctx, const void *key, int len, const EVP_MD *md) int HMAC_Update(HMAC_CTX *ctx, const unsigned char *data, size_t len) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->i_ctx.engine) + return FIPS_hmac_update(ctx, data, len); +#endif return EVP_DigestUpdate(&ctx->md_ctx,data,len); } @@ -140,6 +166,10 @@ int HMAC_Final(HMAC_CTX *ctx, unsigned char *md, unsigned int *len) { unsigned int i; unsigned char buf[EVP_MAX_MD_SIZE]; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->i_ctx.engine) + return FIPS_hmac_final(ctx, md, len); +#endif if (!EVP_DigestFinal_ex(&ctx->md_ctx,buf,&i)) goto err; @@ -179,6 +209,13 @@ int HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx) void HMAC_CTX_cleanup(HMAC_CTX *ctx) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !ctx->i_ctx.engine) + { + FIPS_hmac_ctx_cleanup(ctx); + return; + } +#endif EVP_MD_CTX_cleanup(&ctx->i_ctx); EVP_MD_CTX_cleanup(&ctx->o_ctx); EVP_MD_CTX_cleanup(&ctx->md_ctx); diff --git a/app/openssl/crypto/ia64cpuid.S b/app/openssl/crypto/ia64cpuid.S index d705fff7..7832b9b6 100644 --- a/app/openssl/crypto/ia64cpuid.S +++ b/app/openssl/crypto/ia64cpuid.S @@ -26,7 +26,7 @@ OPENSSL_atomic_add: { .mii; mov ar.ccv=r2 add r8=r2,r33 mov r3=r2 };; -{ .mmi; mf +{ .mmi; mf;; cmpxchg4.acq r2=[r32],r8,ar.ccv nop.i 0 };; { .mib; cmp.ne p6,p0=r2,r3 diff --git a/app/openssl/crypto/md4/md4.h b/app/openssl/crypto/md4/md4.h index c3ed9b3f..a55368a7 100644 --- a/app/openssl/crypto/md4/md4.h +++ b/app/openssl/crypto/md4/md4.h @@ -105,6 +105,9 @@ typedef struct MD4state_st unsigned int num; } MD4_CTX; +#ifdef OPENSSL_FIPS +int private_MD4_Init(MD4_CTX *c); +#endif int MD4_Init(MD4_CTX *c); int MD4_Update(MD4_CTX *c, const void *data, size_t len); int MD4_Final(unsigned char *md, MD4_CTX *c); diff --git a/app/openssl/crypto/md4/md4_dgst.c b/app/openssl/crypto/md4/md4_dgst.c index e0c42e85..b5b165b0 100644 --- a/app/openssl/crypto/md4/md4_dgst.c +++ b/app/openssl/crypto/md4/md4_dgst.c @@ -57,8 +57,9 @@ */ #include -#include "md4_locl.h" #include +#include +#include "md4_locl.h" const char MD4_version[]="MD4" OPENSSL_VERSION_PTEXT; @@ -70,7 +71,7 @@ const char MD4_version[]="MD4" OPENSSL_VERSION_PTEXT; #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L -int MD4_Init(MD4_CTX *c) +fips_md_init(MD4) { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; @@ -105,22 +106,23 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) for (;num--;) { - HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; + (void)HOST_c2l(data,l); X( 0)=l; + (void)HOST_c2l(data,l); X( 1)=l; /* Round 0 */ - R0(A,B,C,D,X( 0), 3,0); HOST_c2l(data,l); X( 2)=l; - R0(D,A,B,C,X( 1), 7,0); HOST_c2l(data,l); X( 3)=l; - R0(C,D,A,B,X( 2),11,0); HOST_c2l(data,l); X( 4)=l; - R0(B,C,D,A,X( 3),19,0); HOST_c2l(data,l); X( 5)=l; - R0(A,B,C,D,X( 4), 3,0); HOST_c2l(data,l); X( 6)=l; - R0(D,A,B,C,X( 5), 7,0); HOST_c2l(data,l); X( 7)=l; - R0(C,D,A,B,X( 6),11,0); HOST_c2l(data,l); X( 8)=l; - R0(B,C,D,A,X( 7),19,0); HOST_c2l(data,l); X( 9)=l; - R0(A,B,C,D,X( 8), 3,0); HOST_c2l(data,l); X(10)=l; - R0(D,A,B,C,X( 9), 7,0); HOST_c2l(data,l); X(11)=l; - R0(C,D,A,B,X(10),11,0); HOST_c2l(data,l); X(12)=l; - R0(B,C,D,A,X(11),19,0); HOST_c2l(data,l); X(13)=l; - R0(A,B,C,D,X(12), 3,0); HOST_c2l(data,l); X(14)=l; - R0(D,A,B,C,X(13), 7,0); HOST_c2l(data,l); X(15)=l; + R0(A,B,C,D,X( 0), 3,0); (void)HOST_c2l(data,l); X( 2)=l; + R0(D,A,B,C,X( 1), 7,0); (void)HOST_c2l(data,l); X( 3)=l; + R0(C,D,A,B,X( 2),11,0); (void)HOST_c2l(data,l); X( 4)=l; + R0(B,C,D,A,X( 3),19,0); (void)HOST_c2l(data,l); X( 5)=l; + R0(A,B,C,D,X( 4), 3,0); (void)HOST_c2l(data,l); X( 6)=l; + R0(D,A,B,C,X( 5), 7,0); (void)HOST_c2l(data,l); X( 7)=l; + R0(C,D,A,B,X( 6),11,0); (void)HOST_c2l(data,l); X( 8)=l; + R0(B,C,D,A,X( 7),19,0); (void)HOST_c2l(data,l); X( 9)=l; + R0(A,B,C,D,X( 8), 3,0); (void)HOST_c2l(data,l); X(10)=l; + R0(D,A,B,C,X( 9), 7,0); (void)HOST_c2l(data,l); X(11)=l; + R0(C,D,A,B,X(10),11,0); (void)HOST_c2l(data,l); X(12)=l; + R0(B,C,D,A,X(11),19,0); (void)HOST_c2l(data,l); X(13)=l; + R0(A,B,C,D,X(12), 3,0); (void)HOST_c2l(data,l); X(14)=l; + R0(D,A,B,C,X(13), 7,0); (void)HOST_c2l(data,l); X(15)=l; R0(C,D,A,B,X(14),11,0); R0(B,C,D,A,X(15),19,0); /* Round 1 */ diff --git a/app/openssl/crypto/md4/md4_locl.h b/app/openssl/crypto/md4/md4_locl.h index c8085b0e..99c3e500 100644 --- a/app/openssl/crypto/md4/md4_locl.h +++ b/app/openssl/crypto/md4/md4_locl.h @@ -77,10 +77,10 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); #define HASH_FINAL MD4_Final #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ - ll=(c)->A; HOST_l2c(ll,(s)); \ - ll=(c)->B; HOST_l2c(ll,(s)); \ - ll=(c)->C; HOST_l2c(ll,(s)); \ - ll=(c)->D; HOST_l2c(ll,(s)); \ + ll=(c)->A; (void)HOST_l2c(ll,(s)); \ + ll=(c)->B; (void)HOST_l2c(ll,(s)); \ + ll=(c)->C; (void)HOST_l2c(ll,(s)); \ + ll=(c)->D; (void)HOST_l2c(ll,(s)); \ } while (0) #define HASH_BLOCK_DATA_ORDER md4_block_data_order diff --git a/app/openssl/crypto/md5/asm/md5-586.S b/app/openssl/crypto/md5/asm/md5-586.S new file mode 100644 index 00000000..23e4de7d --- /dev/null +++ b/app/openssl/crypto/md5/asm/md5-586.S @@ -0,0 +1,679 @@ +.file "crypto/md5/asm/md5-586.s" +.text +.globl md5_block_asm_data_order +.type md5_block_asm_data_order,@function +.align 16 +md5_block_asm_data_order: +.L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +.L000start: + + + movl %ecx,%edi + movl (%esi),%ebp + + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + + + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae .L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin diff --git a/app/openssl/crypto/md5/asm/md5-x86_64.S b/app/openssl/crypto/md5/asm/md5-x86_64.S new file mode 100644 index 00000000..235d5e4e --- /dev/null +++ b/app/openssl/crypto/md5/asm/md5-x86_64.S @@ -0,0 +1,668 @@ +.text +.align 16 + +.globl md5_block_asm_data_order +.type md5_block_asm_data_order,@function +md5_block_asm_data_order: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r14 + pushq %r15 +.Lprologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je .Lend + + +.Lloop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb .Lloop + + +.Lend: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r12 + movq 24(%rsp),%rbx + movq 32(%rsp),%rbp + addq $40,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size md5_block_asm_data_order,.-md5_block_asm_data_order diff --git a/app/openssl/crypto/md5/asm/md5-x86_64.pl b/app/openssl/crypto/md5/asm/md5-x86_64.pl index 86788543..f11224d1 100755 --- a/app/openssl/crypto/md5/asm/md5-x86_64.pl +++ b/app/openssl/crypto/md5/asm/md5-x86_64.pl @@ -120,7 +120,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; die "can't locate x86_64-xlate.pl"; no warnings qw(uninitialized); -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $code .= < #include "md5_locl.h" #include +#include const char MD5_version[]="MD5" OPENSSL_VERSION_PTEXT; @@ -70,7 +71,7 @@ const char MD5_version[]="MD5" OPENSSL_VERSION_PTEXT; #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L -int MD5_Init(MD5_CTX *c) +fips_md_init(MD5) { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; diff --git a/app/openssl/crypto/md5/md5_locl.h b/app/openssl/crypto/md5/md5_locl.h index 968d5779..74d63d1f 100644 --- a/app/openssl/crypto/md5/md5_locl.h +++ b/app/openssl/crypto/md5/md5_locl.h @@ -86,10 +86,10 @@ void md5_block_data_order (MD5_CTX *c, const void *p,size_t num); #define HASH_FINAL MD5_Final #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ - ll=(c)->A; HOST_l2c(ll,(s)); \ - ll=(c)->B; HOST_l2c(ll,(s)); \ - ll=(c)->C; HOST_l2c(ll,(s)); \ - ll=(c)->D; HOST_l2c(ll,(s)); \ + ll=(c)->A; (void)HOST_l2c(ll,(s)); \ + ll=(c)->B; (void)HOST_l2c(ll,(s)); \ + ll=(c)->C; (void)HOST_l2c(ll,(s)); \ + ll=(c)->D; (void)HOST_l2c(ll,(s)); \ } while (0) #define HASH_BLOCK_DATA_ORDER md5_block_data_order diff --git a/app/openssl/crypto/mdc2/mdc2.h b/app/openssl/crypto/mdc2/mdc2.h index 72778a52..f3e8e579 100644 --- a/app/openssl/crypto/mdc2/mdc2.h +++ b/app/openssl/crypto/mdc2/mdc2.h @@ -81,6 +81,9 @@ typedef struct mdc2_ctx_st } MDC2_CTX; +#ifdef OPENSSL_FIPS +int private_MDC2_Init(MDC2_CTX *c); +#endif int MDC2_Init(MDC2_CTX *c); int MDC2_Update(MDC2_CTX *c, const unsigned char *data, size_t len); int MDC2_Final(unsigned char *md, MDC2_CTX *c); diff --git a/app/openssl/crypto/mdc2/mdc2dgst.c b/app/openssl/crypto/mdc2/mdc2dgst.c index 4aa406ed..d66ed6a1 100644 --- a/app/openssl/crypto/mdc2/mdc2dgst.c +++ b/app/openssl/crypto/mdc2/mdc2dgst.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,7 @@ *((c)++)=(unsigned char)(((l)>>24L)&0xff)) static void mdc2_body(MDC2_CTX *c, const unsigned char *in, size_t len); -int MDC2_Init(MDC2_CTX *c) +fips_md_init(MDC2) { c->num=0; c->pad_type=1; diff --git a/app/openssl/crypto/mem.c b/app/openssl/crypto/mem.c index 6f80dd33..1cc62eaf 100644 --- a/app/openssl/crypto/mem.c +++ b/app/openssl/crypto/mem.c @@ -121,10 +121,11 @@ static void (*set_debug_options_func)(long) = NULL; static long (*get_debug_options_func)(void) = NULL; #endif - int CRYPTO_set_mem_functions(void *(*m)(size_t), void *(*r)(void *, size_t), void (*f)(void *)) { + /* Dummy call just to ensure OPENSSL_init() gets linked in */ + OPENSSL_init(); if (!allow_customize) return 0; if ((m == 0) || (r == 0) || (f == 0)) @@ -186,6 +187,7 @@ int CRYPTO_set_mem_debug_functions(void (*m)(void *,int,const char *,int,int), { if (!allow_customize_debug) return 0; + OPENSSL_init(); malloc_debug_func=m; realloc_debug_func=r; free_debug_func=f; @@ -361,6 +363,10 @@ void *CRYPTO_realloc_clean(void *str, int old_len, int num, const char *file, if (num <= 0) return NULL; + /* We don't support shrinking the buffer. Note the memcpy that copies + * |old_len| bytes to the new buffer, below. */ + if (num < old_len) return NULL; + if (realloc_debug_func != NULL) realloc_debug_func(str, NULL, num, file, line, 0); ret=malloc_ex_func(num,file,line); diff --git a/app/openssl/crypto/modes/asm/ghash-alpha.pl b/app/openssl/crypto/modes/asm/ghash-alpha.pl new file mode 100644 index 00000000..aa360293 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-alpha.pl @@ -0,0 +1,460 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# March 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+128 bytes shared table]. Even though +# loops are aggressively modulo-scheduled in respect to references to +# Htbl and Z.hi updates for 8 cycles per byte, measured performance is +# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic +# scheduling "glitch," because uprofile(1) indicates uniform sample +# distribution, as if all instruction bundles execute in 1.5 cycles. +# Meaning that it could have been even faster, yet 12 cycles is ~60% +# better than gcc-generated code and ~80% than code generated by vendor +# compiler. + +$cnt="v0"; # $0 +$t0="t0"; +$t1="t1"; +$t2="t2"; +$Thi0="t3"; # $4 +$Tlo0="t4"; +$Thi1="t5"; +$Tlo1="t6"; +$rem="t7"; # $8 +################# +$Xi="a0"; # $16, input argument block +$Htbl="a1"; +$inp="a2"; +$len="a3"; +$nlo="a4"; # $20 +$nhi="a5"; +$Zhi="t8"; +$Zlo="t9"; +$Xhi="t10"; # $24 +$Xlo="t11"; +$remp="t12"; +$rem_4bit="AT"; # $28 + +{ my $N; + sub loop() { + + $N++; +$code.=<<___; +.align 4 + extbl $Xlo,7,$nlo + and $nlo,0xf0,$nhi + sll $nlo,4,$nlo + and $nlo,0xf0,$nlo + + addq $nlo,$Htbl,$nlo + ldq $Zlo,8($nlo) + addq $nhi,$Htbl,$nhi + ldq $Zhi,0($nlo) + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + lda $cnt,6(zero) + extbl $Xlo,6,$nlo + + ldq $Tlo1,8($nhi) + s8addq $remp,$rem_4bit,$remp + ldq $Thi1,0($nhi) + srl $Zlo,4,$Zlo + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $t0,$Zlo,$Zlo + and $nlo,0xf0,$nhi + + xor $Tlo1,$Zlo,$Zlo + sll $nlo,4,$nlo + xor $Thi1,$Zhi,$Zhi + and $nlo,0xf0,$nlo + + addq $nlo,$Htbl,$nlo + ldq $Tlo0,8($nlo) + addq $nhi,$Htbl,$nhi + ldq $Thi0,0($nlo) + +.Looplo$N: + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + subq $cnt,1,$cnt + srl $Zlo,4,$Zlo + + ldq $Tlo1,8($nhi) + xor $rem,$Zhi,$Zhi + ldq $Thi1,0($nhi) + s8addq $remp,$rem_4bit,$remp + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $t0,$Zlo,$Zlo + extbl $Xlo,$cnt,$nlo + + and $nlo,0xf0,$nhi + xor $Thi0,$Zhi,$Zhi + xor $Tlo0,$Zlo,$Zlo + sll $nlo,4,$nlo + + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + and $nlo,0xf0,$nlo + srl $Zlo,4,$Zlo + + s8addq $remp,$rem_4bit,$remp + xor $rem,$Zhi,$Zhi + addq $nlo,$Htbl,$nlo + addq $nhi,$Htbl,$nhi + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + ldq $Tlo0,8($nlo) + xor $t0,$Zlo,$Zlo + + xor $Tlo1,$Zlo,$Zlo + xor $Thi1,$Zhi,$Zhi + ldq $Thi0,0($nlo) + bne $cnt,.Looplo$N + + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + lda $cnt,7(zero) + srl $Zlo,4,$Zlo + + ldq $Tlo1,8($nhi) + xor $rem,$Zhi,$Zhi + ldq $Thi1,0($nhi) + s8addq $remp,$rem_4bit,$remp + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $t0,$Zlo,$Zlo + extbl $Xhi,$cnt,$nlo + + and $nlo,0xf0,$nhi + xor $Thi0,$Zhi,$Zhi + xor $Tlo0,$Zlo,$Zlo + sll $nlo,4,$nlo + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + and $nlo,0xf0,$nlo + srl $Zlo,4,$Zlo + + s8addq $remp,$rem_4bit,$remp + xor $rem,$Zhi,$Zhi + addq $nlo,$Htbl,$nlo + addq $nhi,$Htbl,$nhi + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + ldq $Tlo0,8($nlo) + xor $t0,$Zlo,$Zlo + + xor $Tlo1,$Zlo,$Zlo + xor $Thi1,$Zhi,$Zhi + ldq $Thi0,0($nlo) + unop + + +.Loophi$N: + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + subq $cnt,1,$cnt + srl $Zlo,4,$Zlo + + ldq $Tlo1,8($nhi) + xor $rem,$Zhi,$Zhi + ldq $Thi1,0($nhi) + s8addq $remp,$rem_4bit,$remp + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $t0,$Zlo,$Zlo + extbl $Xhi,$cnt,$nlo + + and $nlo,0xf0,$nhi + xor $Thi0,$Zhi,$Zhi + xor $Tlo0,$Zlo,$Zlo + sll $nlo,4,$nlo + + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + and $nlo,0xf0,$nlo + srl $Zlo,4,$Zlo + + s8addq $remp,$rem_4bit,$remp + xor $rem,$Zhi,$Zhi + addq $nlo,$Htbl,$nlo + addq $nhi,$Htbl,$nhi + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + ldq $Tlo0,8($nlo) + xor $t0,$Zlo,$Zlo + + xor $Tlo1,$Zlo,$Zlo + xor $Thi1,$Zhi,$Zhi + ldq $Thi0,0($nlo) + bne $cnt,.Loophi$N + + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + srl $Zlo,4,$Zlo + + ldq $Tlo1,8($nhi) + xor $rem,$Zhi,$Zhi + ldq $Thi1,0($nhi) + s8addq $remp,$rem_4bit,$remp + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $t0,$Zlo,$Zlo + + xor $Tlo0,$Zlo,$Zlo + xor $Thi0,$Zhi,$Zhi + + and $Zlo,0x0f,$remp + sll $Zhi,60,$t0 + srl $Zlo,4,$Zlo + + s8addq $remp,$rem_4bit,$remp + xor $rem,$Zhi,$Zhi + + ldq $rem,0($remp) + srl $Zhi,4,$Zhi + xor $Tlo1,$Zlo,$Zlo + xor $Thi1,$Zhi,$Zhi + xor $t0,$Zlo,$Zlo + xor $rem,$Zhi,$Zhi +___ +}} + +$code=<<___; +#ifdef __linux__ +#include +#else +#include +#include +#endif + +.text + +.set noat +.set noreorder +.globl gcm_gmult_4bit +.align 4 +.ent gcm_gmult_4bit +gcm_gmult_4bit: + .frame sp,0,ra + .prologue 0 + + ldq $Xlo,8($Xi) + ldq $Xhi,0($Xi) + + bsr $t0,picmeup + nop +___ + + &loop(); + +$code.=<<___; + srl $Zlo,24,$t0 # byte swap + srl $Zlo,8,$t1 + + sll $Zlo,8,$t2 + sll $Zlo,24,$Zlo + zapnot $t0,0x11,$t0 + zapnot $t1,0x22,$t1 + + zapnot $Zlo,0x88,$Zlo + or $t0,$t1,$t0 + zapnot $t2,0x44,$t2 + + or $Zlo,$t0,$Zlo + srl $Zhi,24,$t0 + srl $Zhi,8,$t1 + + or $Zlo,$t2,$Zlo + sll $Zhi,8,$t2 + sll $Zhi,24,$Zhi + + srl $Zlo,32,$Xlo + sll $Zlo,32,$Zlo + + zapnot $t0,0x11,$t0 + zapnot $t1,0x22,$t1 + or $Zlo,$Xlo,$Xlo + + zapnot $Zhi,0x88,$Zhi + or $t0,$t1,$t0 + zapnot $t2,0x44,$t2 + + or $Zhi,$t0,$Zhi + or $Zhi,$t2,$Zhi + + srl $Zhi,32,$Xhi + sll $Zhi,32,$Zhi + + or $Zhi,$Xhi,$Xhi + stq $Xlo,8($Xi) + stq $Xhi,0($Xi) + + ret (ra) +.end gcm_gmult_4bit +___ + +$inhi="s0"; +$inlo="s1"; + +$code.=<<___; +.globl gcm_ghash_4bit +.align 4 +.ent gcm_ghash_4bit +gcm_ghash_4bit: + lda sp,-32(sp) + stq ra,0(sp) + stq s0,8(sp) + stq s1,16(sp) + .mask 0x04000600,-32 + .frame sp,32,ra + .prologue 0 + + ldq_u $inhi,0($inp) + ldq_u $Thi0,7($inp) + ldq_u $inlo,8($inp) + ldq_u $Tlo0,15($inp) + ldq $Xhi,0($Xi) + ldq $Xlo,8($Xi) + + bsr $t0,picmeup + nop + +.Louter: + extql $inhi,$inp,$inhi + extqh $Thi0,$inp,$Thi0 + or $inhi,$Thi0,$inhi + lda $inp,16($inp) + + extql $inlo,$inp,$inlo + extqh $Tlo0,$inp,$Tlo0 + or $inlo,$Tlo0,$inlo + subq $len,16,$len + + xor $Xlo,$inlo,$Xlo + xor $Xhi,$inhi,$Xhi +___ + + &loop(); + +$code.=<<___; + srl $Zlo,24,$t0 # byte swap + srl $Zlo,8,$t1 + + sll $Zlo,8,$t2 + sll $Zlo,24,$Zlo + zapnot $t0,0x11,$t0 + zapnot $t1,0x22,$t1 + + zapnot $Zlo,0x88,$Zlo + or $t0,$t1,$t0 + zapnot $t2,0x44,$t2 + + or $Zlo,$t0,$Zlo + srl $Zhi,24,$t0 + srl $Zhi,8,$t1 + + or $Zlo,$t2,$Zlo + sll $Zhi,8,$t2 + sll $Zhi,24,$Zhi + + srl $Zlo,32,$Xlo + sll $Zlo,32,$Zlo + beq $len,.Ldone + + zapnot $t0,0x11,$t0 + zapnot $t1,0x22,$t1 + or $Zlo,$Xlo,$Xlo + ldq_u $inhi,0($inp) + + zapnot $Zhi,0x88,$Zhi + or $t0,$t1,$t0 + zapnot $t2,0x44,$t2 + ldq_u $Thi0,7($inp) + + or $Zhi,$t0,$Zhi + or $Zhi,$t2,$Zhi + ldq_u $inlo,8($inp) + ldq_u $Tlo0,15($inp) + + srl $Zhi,32,$Xhi + sll $Zhi,32,$Zhi + + or $Zhi,$Xhi,$Xhi + br zero,.Louter + +.Ldone: + zapnot $t0,0x11,$t0 + zapnot $t1,0x22,$t1 + or $Zlo,$Xlo,$Xlo + + zapnot $Zhi,0x88,$Zhi + or $t0,$t1,$t0 + zapnot $t2,0x44,$t2 + + or $Zhi,$t0,$Zhi + or $Zhi,$t2,$Zhi + + srl $Zhi,32,$Xhi + sll $Zhi,32,$Zhi + + or $Zhi,$Xhi,$Xhi + + stq $Xlo,8($Xi) + stq $Xhi,0($Xi) + + .set noreorder + /*ldq ra,0(sp)*/ + ldq s0,8(sp) + ldq s1,16(sp) + lda sp,32(sp) + ret (ra) +.end gcm_ghash_4bit + +.align 4 +.ent picmeup +picmeup: + .frame sp,0,$t0 + .prologue 0 + br $rem_4bit,.Lpic +.Lpic: lda $rem_4bit,12($rem_4bit) + ret ($t0) +.end picmeup + nop +rem_4bit: + .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16 + .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16 + .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16 + .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16 +.ascii "GHASH for Alpha, CRYPTOGAMS by " +.align 4 + +___ +$output=shift and open STDOUT,">$output"; +print $code; +close STDOUT; + diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.S b/app/openssl/crypto/modes/asm/ghash-armv4.S new file mode 100644 index 00000000..d66c4cbf --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-armv4.S @@ -0,0 +1,408 @@ +#include "arm_arch.h" + +.text +.code 32 + +.type rem_4bit,%object +.align 5 +rem_4bit: +.short 0x0000,0x1C20,0x3840,0x2460 +.short 0x7080,0x6CA0,0x48C0,0x54E0 +.short 0xE100,0xFD20,0xD940,0xC560 +.short 0x9180,0x8DA0,0xA9C0,0xB5E0 +.size rem_4bit,.-rem_4bit + +.type rem_4bit_get,%function +rem_4bit_get: + sub r2,pc,#8 + sub r2,r2,#32 @ &rem_4bit + b .Lrem_4bit_got + nop +.size rem_4bit_get,.-rem_4bit_get + +.global gcm_ghash_4bit +.type gcm_ghash_4bit,%function +gcm_ghash_4bit: + sub r12,pc,#8 + add r3,r2,r3 @ r3 to point at the end + stmdb sp!,{r3-r11,lr} @ save r3/end too + sub r12,r12,#48 @ &rem_4bit + + ldmia r12,{r4-r11} @ copy rem_4bit ... + stmdb sp!,{r4-r11} @ ... to stack + + ldrb r12,[r2,#15] + ldrb r14,[r0,#15] +.Louter: + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + add r11,r1,r14 + ldrb r12,[r2,#14] + + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[sp,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + ldrb r14,[r0,#14] + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + eor r7,r7,r8,lsl#16 + +.Linner: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[sp,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrplb r12,[r2,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + ldrplb r8,[r0,r3] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r9,[sp,r14] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eorpl r12,r12,r8 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] + bpl .Linner + + ldr r3,[sp,#32] @ re-load r3/end + add r2,r2,#16 + mov r14,r4 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + cmp r2,r3 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + ldrneb r12,[r2,#15] +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + + bne .Louter + + add sp,sp,#36 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_ghash_4bit,.-gcm_ghash_4bit + +.global gcm_gmult_4bit +.type gcm_gmult_4bit,%function +gcm_gmult_4bit: + stmdb sp!,{r4-r11,lr} + ldrb r12,[r0,#15] + b rem_4bit_get +.Lrem_4bit_got: + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + ldrb r12,[r0,#14] + + add r11,r1,r14 + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + and r14,r12,#0xf0 + eor r7,r7,r8,lsl#16 + and r12,r12,#0x0f + +.Loop: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[r2,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrplb r12,[r0,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + bpl .Loop +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_gmult_4bit,.-gcm_gmult_4bit +#if __ARM_ARCH__>=7 +.fpu neon + +.global gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + sub r1,#16 @ point at H in GCM128_CTX + vld1.64 d29,[r0,:64]!@ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d28,[r0,:64]! + vshr.u64 d5,#32 + vldmia r1,{d0-d1} @ load H + veor q12,q12 +#ifdef __ARMEL__ + vrev64.8 q14,q14 +#endif + veor q13,q13 + veor q11,q11 + mov r1,#16 + veor q10,q10 + mov r3,#16 + veor d2,d2 + vdup.8 d4,d28[0] @ broadcast lowest byte + b .Linner_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.global gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 d21,[r0,:64]! @ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d20,[r0,:64]! + vshr.u64 d5,#32 + vldmia r0,{d0-d1} @ load H + veor q12,q12 + nop +#ifdef __ARMEL__ + vrev64.8 q10,q10 +#endif +.Louter_neon: + vld1.64 d29,[r2]! @ load inp + veor q13,q13 + vld1.64 d28,[r2]! + veor q11,q11 + mov r1,#16 +#ifdef __ARMEL__ + vrev64.8 q14,q14 +#endif + veor d2,d2 + veor q14,q10 @ inp^=Xi + veor q10,q10 + vdup.8 d4,d28[0] @ broadcast lowest byte +.Linner_neon: + subs r1,r1,#1 + vmull.p8 q9,d1,d4 @ H.lo·Xi[i] + vmull.p8 q8,d0,d4 @ H.hi·Xi[i] + vext.8 q14,q12,#1 @ IN>>=8 + + veor q10,q13 @ modulo-scheduled part + vshl.i64 d22,#48 + vdup.8 d4,d28[0] @ broadcast lowest byte + veor d3,d18,d20 + + veor d21,d22 + vuzp.8 q9,q8 + vsli.8 d2,d3,#1 @ compose the "carry" byte + vext.8 q10,q12,#1 @ Z>>=8 + + vmull.p8 q11,d2,d5 @ "carry"·0xe1 + vshr.u8 d2,d3,#7 @ save Z's bottom bit + vext.8 q13,q9,q12,#1 @ Qlo>>=8 + veor q10,q8 + bne .Linner_neon + + veor q10,q13 @ modulo-scheduled artefact + vshl.i64 d22,#48 + veor d21,d22 + + @ finalization, normalize Z:Zo + vand d2,d5 @ suffices to mask the bit + vshr.u64 d3,d20,#63 + vshl.i64 q10,#1 + subs r3,#16 + vorr q10,q1 @ Z=Z:Zo<<1 + bne .Louter_neon + +#ifdef __ARMEL__ + vrev64.8 q10,q10 +#endif + sub r0,#16 + vst1.64 d21,[r0,:64]! @ write out Xi + vst1.64 d20,[r0,:64] + + .word 0xe12fff1e +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " +.align 2 diff --git a/app/openssl/crypto/modes/asm/ghash-armv4.pl b/app/openssl/crypto/modes/asm/ghash-armv4.pl new file mode 100644 index 00000000..e46f8e34 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-armv4.pl @@ -0,0 +1,429 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# April 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+32 bytes shared table]. There is no +# experimental performance data available yet. The only approximation +# that can be made at this point is based on code size. Inner loop is +# 32 instructions long and on single-issue core should execute in <40 +# cycles. Having verified that gcc 3.4 didn't unroll corresponding +# loop, this assembler loop body was found to be ~3x smaller than +# compiler-generated one... +# +# July 2010 +# +# Rescheduling for dual-issue pipeline resulted in 8.5% improvement on +# Cortex A8 core and ~25 cycles per processed byte (which was observed +# to be ~3 times faster than gcc-generated code:-) +# +# February 2011 +# +# Profiler-assisted and platform-specific optimization resulted in 7% +# improvement on Cortex A8 core and ~23.5 cycles per byte. +# +# March 2011 +# +# Add NEON implementation featuring polynomial multiplication, i.e. no +# lookup tables involved. On Cortex A8 it was measured to process one +# byte in 15 cycles or 55% faster than integer-only code. + +# ==================================================================== +# Note about "528B" variant. In ARM case it makes lesser sense to +# implement it for following reasons: +# +# - performance improvement won't be anywhere near 50%, because 128- +# bit shift operation is neatly fused with 128-bit xor here, and +# "538B" variant would eliminate only 4-5 instructions out of 32 +# in the inner loop (meaning that estimated improvement is ~15%); +# - ARM-based systems are often embedded ones and extra memory +# consumption might be unappreciated (for so little improvement); +# +# Byte order [in]dependence. ========================================= +# +# Caller is expected to maintain specific *dword* order in Htable, +# namely with *least* significant dword of 128-bit value at *lower* +# address. This differs completely from C code and has everything to +# do with ldm instruction and order in which dwords are "consumed" by +# algorithm. *Byte* order within these dwords in turn is whatever +# *native* byte order on current platform. See gcm128.c for working +# example... + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$Xi="r0"; # argument block +$Htbl="r1"; +$inp="r2"; +$len="r3"; + +$Zll="r4"; # variables +$Zlh="r5"; +$Zhl="r6"; +$Zhh="r7"; +$Tll="r8"; +$Tlh="r9"; +$Thl="r10"; +$Thh="r11"; +$nlo="r12"; +################# r13 is stack pointer +$nhi="r14"; +################# r15 is program counter + +$rem_4bit=$inp; # used in gcm_gmult_4bit +$cnt=$len; + +sub Zsmash() { + my $i=12; + my @args=@_; + for ($Zll,$Zlh,$Zhl,$Zhh) { + $code.=<<___; +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev $_,$_ + str $_,[$Xi,#$i] +#elif defined(__ARMEB__) + str $_,[$Xi,#$i] +#else + mov $Tlh,$_,lsr#8 + strb $_,[$Xi,#$i+3] + mov $Thl,$_,lsr#16 + strb $Tlh,[$Xi,#$i+2] + mov $Thh,$_,lsr#24 + strb $Thl,[$Xi,#$i+1] + strb $Thh,[$Xi,#$i] +#endif +___ + $code.="\t".shift(@args)."\n"; + $i-=4; + } +} + +$code=<<___; +#include "arm_arch.h" + +.text +.code 32 + +.type rem_4bit,%object +.align 5 +rem_4bit: +.short 0x0000,0x1C20,0x3840,0x2460 +.short 0x7080,0x6CA0,0x48C0,0x54E0 +.short 0xE100,0xFD20,0xD940,0xC560 +.short 0x9180,0x8DA0,0xA9C0,0xB5E0 +.size rem_4bit,.-rem_4bit + +.type rem_4bit_get,%function +rem_4bit_get: + sub $rem_4bit,pc,#8 + sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit + b .Lrem_4bit_got + nop +.size rem_4bit_get,.-rem_4bit_get + +.global gcm_ghash_4bit +.type gcm_ghash_4bit,%function +gcm_ghash_4bit: + sub r12,pc,#8 + add $len,$inp,$len @ $len to point at the end + stmdb sp!,{r3-r11,lr} @ save $len/end too + sub r12,r12,#48 @ &rem_4bit + + ldmia r12,{r4-r11} @ copy rem_4bit ... + stmdb sp!,{r4-r11} @ ... to stack + + ldrb $nlo,[$inp,#15] + ldrb $nhi,[$Xi,#15] +.Louter: + eor $nlo,$nlo,$nhi + and $nhi,$nlo,#0xf0 + and $nlo,$nlo,#0x0f + mov $cnt,#14 + + add $Zhh,$Htbl,$nlo,lsl#4 + ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo] + add $Thh,$Htbl,$nhi + ldrb $nlo,[$inp,#14] + + and $nhi,$Zll,#0xf @ rem + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] + add $nhi,$nhi,$nhi + eor $Zll,$Tll,$Zll,lsr#4 + ldrh $Tll,[sp,$nhi] @ rem_4bit[rem] + eor $Zll,$Zll,$Zlh,lsl#28 + ldrb $nhi,[$Xi,#14] + eor $Zlh,$Tlh,$Zlh,lsr#4 + eor $Zlh,$Zlh,$Zhl,lsl#28 + eor $Zhl,$Thl,$Zhl,lsr#4 + eor $Zhl,$Zhl,$Zhh,lsl#28 + eor $Zhh,$Thh,$Zhh,lsr#4 + eor $nlo,$nlo,$nhi + and $nhi,$nlo,#0xf0 + and $nlo,$nlo,#0x0f + eor $Zhh,$Zhh,$Tll,lsl#16 + +.Linner: + add $Thh,$Htbl,$nlo,lsl#4 + and $nlo,$Zll,#0xf @ rem + subs $cnt,$cnt,#1 + add $nlo,$nlo,$nlo + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo] + eor $Zll,$Tll,$Zll,lsr#4 + eor $Zll,$Zll,$Zlh,lsl#28 + eor $Zlh,$Tlh,$Zlh,lsr#4 + eor $Zlh,$Zlh,$Zhl,lsl#28 + ldrh $Tll,[sp,$nlo] @ rem_4bit[rem] + eor $Zhl,$Thl,$Zhl,lsr#4 + ldrplb $nlo,[$inp,$cnt] + eor $Zhl,$Zhl,$Zhh,lsl#28 + eor $Zhh,$Thh,$Zhh,lsr#4 + + add $Thh,$Htbl,$nhi + and $nhi,$Zll,#0xf @ rem + eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] + add $nhi,$nhi,$nhi + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] + eor $Zll,$Tll,$Zll,lsr#4 + ldrplb $Tll,[$Xi,$cnt] + eor $Zll,$Zll,$Zlh,lsl#28 + eor $Zlh,$Tlh,$Zlh,lsr#4 + ldrh $Tlh,[sp,$nhi] + eor $Zlh,$Zlh,$Zhl,lsl#28 + eor $Zhl,$Thl,$Zhl,lsr#4 + eor $Zhl,$Zhl,$Zhh,lsl#28 + eorpl $nlo,$nlo,$Tll + eor $Zhh,$Thh,$Zhh,lsr#4 + andpl $nhi,$nlo,#0xf0 + andpl $nlo,$nlo,#0x0f + eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem] + bpl .Linner + + ldr $len,[sp,#32] @ re-load $len/end + add $inp,$inp,#16 + mov $nhi,$Zll +___ + &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]"); +$code.=<<___; + bne .Louter + + add sp,sp,#36 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size gcm_ghash_4bit,.-gcm_ghash_4bit + +.global gcm_gmult_4bit +.type gcm_gmult_4bit,%function +gcm_gmult_4bit: + stmdb sp!,{r4-r11,lr} + ldrb $nlo,[$Xi,#15] + b rem_4bit_get +.Lrem_4bit_got: + and $nhi,$nlo,#0xf0 + and $nlo,$nlo,#0x0f + mov $cnt,#14 + + add $Zhh,$Htbl,$nlo,lsl#4 + ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo] + ldrb $nlo,[$Xi,#14] + + add $Thh,$Htbl,$nhi + and $nhi,$Zll,#0xf @ rem + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] + add $nhi,$nhi,$nhi + eor $Zll,$Tll,$Zll,lsr#4 + ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem] + eor $Zll,$Zll,$Zlh,lsl#28 + eor $Zlh,$Tlh,$Zlh,lsr#4 + eor $Zlh,$Zlh,$Zhl,lsl#28 + eor $Zhl,$Thl,$Zhl,lsr#4 + eor $Zhl,$Zhl,$Zhh,lsl#28 + eor $Zhh,$Thh,$Zhh,lsr#4 + and $nhi,$nlo,#0xf0 + eor $Zhh,$Zhh,$Tll,lsl#16 + and $nlo,$nlo,#0x0f + +.Loop: + add $Thh,$Htbl,$nlo,lsl#4 + and $nlo,$Zll,#0xf @ rem + subs $cnt,$cnt,#1 + add $nlo,$nlo,$nlo + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo] + eor $Zll,$Tll,$Zll,lsr#4 + eor $Zll,$Zll,$Zlh,lsl#28 + eor $Zlh,$Tlh,$Zlh,lsr#4 + eor $Zlh,$Zlh,$Zhl,lsl#28 + ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem] + eor $Zhl,$Thl,$Zhl,lsr#4 + ldrplb $nlo,[$Xi,$cnt] + eor $Zhl,$Zhl,$Zhh,lsl#28 + eor $Zhh,$Thh,$Zhh,lsr#4 + + add $Thh,$Htbl,$nhi + and $nhi,$Zll,#0xf @ rem + eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] + add $nhi,$nhi,$nhi + ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] + eor $Zll,$Tll,$Zll,lsr#4 + eor $Zll,$Zll,$Zlh,lsl#28 + eor $Zlh,$Tlh,$Zlh,lsr#4 + ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem] + eor $Zlh,$Zlh,$Zhl,lsl#28 + eor $Zhl,$Thl,$Zhl,lsr#4 + eor $Zhl,$Zhl,$Zhh,lsl#28 + eor $Zhh,$Thh,$Zhh,lsr#4 + andpl $nhi,$nlo,#0xf0 + andpl $nlo,$nlo,#0x0f + eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] + bpl .Loop +___ + &Zsmash(); +$code.=<<___; +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size gcm_gmult_4bit,.-gcm_gmult_4bit +___ +{ +my $cnt=$Htbl; # $Htbl is used once in the very beginning + +my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); +my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); + +# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit +# in Zo. Or should I say "top bit", because GHASH is specified in +# reverse bit order? Otherwise straightforward 128-bt H by one input +# byte multiplication and modulo-reduction, times 16. + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } +sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } + +$code.=<<___; +#if __ARM_ARCH__>=7 +.fpu neon + +.global gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + sub $Htbl,#16 @ point at H in GCM128_CTX + vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi + vmov.i32 $mod,#0xe1 @ our irreducible polynomial + vld1.64 `&Dlo("$IN")`,[$Xi,:64]! + vshr.u64 $mod,#32 + vldmia $Htbl,{$Hhi-$Hlo} @ load H + veor $zero,$zero +#ifdef __ARMEL__ + vrev64.8 $IN,$IN +#endif + veor $Qpost,$Qpost + veor $R,$R + mov $cnt,#16 + veor $Z,$Z + mov $len,#16 + veor $Zo,$Zo + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte + b .Linner_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.global gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi + vmov.i32 $mod,#0xe1 @ our irreducible polynomial + vld1.64 `&Dlo("$Z")`,[$Xi,:64]! + vshr.u64 $mod,#32 + vldmia $Xi,{$Hhi-$Hlo} @ load H + veor $zero,$zero + nop +#ifdef __ARMEL__ + vrev64.8 $Z,$Z +#endif +.Louter_neon: + vld1.64 `&Dhi($IN)`,[$inp]! @ load inp + veor $Qpost,$Qpost + vld1.64 `&Dlo($IN)`,[$inp]! + veor $R,$R + mov $cnt,#16 +#ifdef __ARMEL__ + vrev64.8 $IN,$IN +#endif + veor $Zo,$Zo + veor $IN,$Z @ inp^=Xi + veor $Z,$Z + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte +.Linner_neon: + subs $cnt,$cnt,#1 + vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i] + vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i] + vext.8 $IN,$zero,#1 @ IN>>=8 + + veor $Z,$Qpost @ modulo-scheduled part + vshl.i64 `&Dlo("$R")`,#48 + vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte + veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")` + + veor `&Dhi("$Z")`,`&Dlo("$R")` + vuzp.8 $Qlo,$Qhi + vsli.8 $Zo,$T,#1 @ compose the "carry" byte + vext.8 $Z,$zero,#1 @ Z>>=8 + + vmull.p8 $R,$Zo,$mod @ "carry"·0xe1 + vshr.u8 $Zo,$T,#7 @ save Z's bottom bit + vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8 + veor $Z,$Qhi + bne .Linner_neon + + veor $Z,$Qpost @ modulo-scheduled artefact + vshl.i64 `&Dlo("$R")`,#48 + veor `&Dhi("$Z")`,`&Dlo("$R")` + + @ finalization, normalize Z:Zo + vand $Zo,$mod @ suffices to mask the bit + vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 + vshl.i64 $Z,#1 + subs $len,#16 + vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1 + bne .Louter_neon + +#ifdef __ARMEL__ + vrev64.8 $Z,$Z +#endif + sub $Xi,#16 + vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi + vst1.64 `&Dlo("$Z")`,[$Xi,:64] + + bx lr +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +___ +} +$code.=<<___; +.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +print $code; +close STDOUT; # enforce flush diff --git a/app/openssl/crypto/modes/asm/ghash-ia64.pl b/app/openssl/crypto/modes/asm/ghash-ia64.pl new file mode 100755 index 00000000..0354c954 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-ia64.pl @@ -0,0 +1,463 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# March 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+128 bytes shared table]. Streamed +# GHASH performance was measured to be 6.67 cycles per processed byte +# on Itanium 2, which is >90% better than Microsoft compiler generated +# code. To anchor to something else sha1-ia64.pl module processes one +# byte in 5.7 cycles. On Itanium GHASH should run at ~8.5 cycles per +# byte. + +# September 2010 +# +# It was originally thought that it makes lesser sense to implement +# "528B" variant on Itanium 2 for following reason. Because number of +# functional units is naturally limited, it appeared impossible to +# implement "528B" loop in 4 cycles, only in 5. This would mean that +# theoretically performance improvement couldn't be more than 20%. +# But occasionally you prove yourself wrong:-) I figured out a way to +# fold couple of instructions and having freed yet another instruction +# slot by unrolling the loop... Resulting performance is 4.45 cycles +# per processed byte and 50% better than "256B" version. On original +# Itanium performance should remain the same as the "256B" version, +# i.e. ~8.5 cycles. + +$output=shift and (open STDOUT,">$output" or die "can't open $output: $!"); + +if ($^O eq "hpux") { + $ADDP="addp4"; + for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } +} else { $ADDP="add"; } +for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); + $big_endian=0 if (/\-DL_ENDIAN/); } +if (!defined($big_endian)) + { $big_endian=(unpack('L',pack('N',1))==1); } + +sub loop() { +my $label=shift; +my ($p16,$p17)=(shift)?("p63","p63"):("p16","p17"); # mask references to inp + +# Loop is scheduled for 6 ticks on Itanium 2 and 8 on Itanium, i.e. +# in scalable manner;-) Naturally assuming data in L1 cache... +# Special note about 'dep' instruction, which is used to construct +# &rem_4bit[Zlo&0xf]. It works, because rem_4bit is aligned at 128 +# bytes boundary and lower 7 bits of its address are guaranteed to +# be zero. +$code.=<<___; +$label: +{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8 + (p19) dep rem=Zlo,rem_4bitp,3,4 } +{ .mfi; (p19) xor Zhi=Zhi,Hhi + ($p17) xor xi[1]=xi[1],in[1] };; +{ .mfi; (p18) ld8 Hhi=[Hi[1]] + (p19) shrp Zlo=Zhi,Zlo,4 } +{ .mfi; (p19) ld8 rem=[rem] + (p18) and Hi[1]=mask0xf0,xi[2] };; +{ .mmi; ($p16) ld1 in[0]=[inp],-1 + (p18) xor Zlo=Zlo,Hlo + (p19) shr.u Zhi=Zhi,4 } +{ .mib; (p19) xor Hhi=Hhi,rem + (p18) add Hi[1]=Htbl,Hi[1] };; + +{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8 + (p18) dep rem=Zlo,rem_4bitp,3,4 } +{ .mfi; (p17) shladd Hi[0]=xi[1],4,r0 + (p18) xor Zhi=Zhi,Hhi };; +{ .mfi; (p18) ld8 Hhi=[Hi[1]] + (p18) shrp Zlo=Zhi,Zlo,4 } +{ .mfi; (p18) ld8 rem=[rem] + (p17) and Hi[0]=mask0xf0,Hi[0] };; +{ .mmi; (p16) ld1 xi[0]=[Xi],-1 + (p18) xor Zlo=Zlo,Hlo + (p18) shr.u Zhi=Zhi,4 } +{ .mib; (p18) xor Hhi=Hhi,rem + (p17) add Hi[0]=Htbl,Hi[0] + br.ctop.sptk $label };; +___ +} + +$code=<<___; +.explicit +.text + +prevfs=r2; prevlc=r3; prevpr=r8; +mask0xf0=r21; +rem=r22; rem_4bitp=r23; +Xi=r24; Htbl=r25; +inp=r26; end=r27; +Hhi=r28; Hlo=r29; +Zhi=r30; Zlo=r31; + +.align 128 +.skip 16 // aligns loop body +.global gcm_gmult_4bit# +.proc gcm_gmult_4bit# +gcm_gmult_4bit: + .prologue +{ .mmi; .save ar.pfs,prevfs + alloc prevfs=ar.pfs,2,6,0,8 + $ADDP Xi=15,in0 // &Xi[15] + mov rem_4bitp=ip } +{ .mii; $ADDP Htbl=8,in1 // &Htbl[0].lo + .save ar.lc,prevlc + mov prevlc=ar.lc + .save pr,prevpr + mov prevpr=pr };; + + .body + .rotr in[3],xi[3],Hi[2] + +{ .mib; ld1 xi[2]=[Xi],-1 // Xi[15] + mov mask0xf0=0xf0 + brp.loop.imp .Loop1,.Lend1-16};; +{ .mmi; ld1 xi[1]=[Xi],-1 // Xi[14] + };; +{ .mii; shladd Hi[1]=xi[2],4,r0 + mov pr.rot=0x7<<16 + mov ar.lc=13 };; +{ .mii; and Hi[1]=mask0xf0,Hi[1] + mov ar.ec=3 + xor Zlo=Zlo,Zlo };; +{ .mii; add Hi[1]=Htbl,Hi[1] // &Htbl[nlo].lo + add rem_4bitp=rem_4bit#-gcm_gmult_4bit#,rem_4bitp + xor Zhi=Zhi,Zhi };; +___ + &loop (".Loop1",1); +$code.=<<___; +.Lend1: +{ .mib; xor Zhi=Zhi,Hhi };; // modulo-scheduling artefact +{ .mib; mux1 Zlo=Zlo,\@rev };; +{ .mib; mux1 Zhi=Zhi,\@rev };; +{ .mmi; add Hlo=9,Xi;; // ;; is here to prevent + add Hhi=1,Xi };; // pipeline flush on Itanium +{ .mib; st8 [Hlo]=Zlo + mov pr=prevpr,0x1ffff };; +{ .mib; st8 [Hhi]=Zhi + mov ar.lc=prevlc + br.ret.sptk.many b0 };; +.endp gcm_gmult_4bit# +___ + +###################################################################### +# "528B" (well, "512B" actualy) streamed GHASH +# +$Xip="in0"; +$Htbl="in1"; +$inp="in2"; +$len="in3"; +$rem_8bit="loc0"; +$mask0xff="loc1"; +($sum,$rum) = $big_endian ? ("nop.m","nop.m") : ("sum","rum"); + +sub load_htable() { + for (my $i=0;$i<8;$i++) { + $code.=<<___; +{ .mmi; ld8 r`16+2*$i+1`=[r8],16 // Htable[$i].hi + ld8 r`16+2*$i`=[r9],16 } // Htable[$i].lo +{ .mmi; ldf8 f`32+2*$i+1`=[r10],16 // Htable[`8+$i`].hi + ldf8 f`32+2*$i`=[r11],16 // Htable[`8+$i`].lo +___ + $code.=shift if (($i+$#_)==7); + $code.="\t};;\n" + } +} + +$code.=<<___; +prevsp=r3; + +.align 32 +.skip 16 // aligns loop body +.global gcm_ghash_4bit# +.proc gcm_ghash_4bit# +gcm_ghash_4bit: + .prologue +{ .mmi; .save ar.pfs,prevfs + alloc prevfs=ar.pfs,4,2,0,0 + .vframe prevsp + mov prevsp=sp + mov $rem_8bit=ip };; + .body +{ .mfi; $ADDP r8=0+0,$Htbl + $ADDP r9=0+8,$Htbl } +{ .mfi; $ADDP r10=128+0,$Htbl + $ADDP r11=128+8,$Htbl };; +___ + &load_htable( + " $ADDP $Xip=15,$Xip", # &Xi[15] + " $ADDP $len=$len,$inp", # &inp[len] + " $ADDP $inp=15,$inp", # &inp[15] + " mov $mask0xff=0xff", + " add sp=-512,sp", + " andcm sp=sp,$mask0xff", # align stack frame + " add r14=0,sp", + " add r15=8,sp"); +$code.=<<___; +{ .mmi; $sum 1<<1 // go big-endian + add r8=256+0,sp + add r9=256+8,sp } +{ .mmi; add r10=256+128+0,sp + add r11=256+128+8,sp + add $len=-17,$len };; +___ +for($i=0;$i<8;$i++) { # generate first half of Hshr4[] +my ($rlo,$rhi)=("r".eval(16+2*$i),"r".eval(16+2*$i+1)); +$code.=<<___; +{ .mmi; st8 [r8]=$rlo,16 // Htable[$i].lo + st8 [r9]=$rhi,16 // Htable[$i].hi + shrp $rlo=$rhi,$rlo,4 }//;; +{ .mmi; stf8 [r10]=f`32+2*$i`,16 // Htable[`8+$i`].lo + stf8 [r11]=f`32+2*$i+1`,16 // Htable[`8+$i`].hi + shr.u $rhi=$rhi,4 };; +{ .mmi; st8 [r14]=$rlo,16 // Htable[$i].lo>>4 + st8 [r15]=$rhi,16 }//;; // Htable[$i].hi>>4 +___ +} +$code.=<<___; +{ .mmi; ld8 r16=[r8],16 // Htable[8].lo + ld8 r17=[r9],16 };; // Htable[8].hi +{ .mmi; ld8 r18=[r8],16 // Htable[9].lo + ld8 r19=[r9],16 } // Htable[9].hi +{ .mmi; rum 1<<5 // clear um.mfh + shrp r16=r17,r16,4 };; +___ +for($i=0;$i<6;$i++) { # generate second half of Hshr4[] +$code.=<<___; +{ .mmi; ld8 r`20+2*$i`=[r8],16 // Htable[`10+$i`].lo + ld8 r`20+2*$i+1`=[r9],16 // Htable[`10+$i`].hi + shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };; +{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4 + st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4 + shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 } +___ +} +$code.=<<___; +{ .mmi; shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };; +{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4 + st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4 + shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 } +{ .mmi; add $Htbl=256,sp // &Htable[0] + add $rem_8bit=rem_8bit#-gcm_ghash_4bit#,$rem_8bit + shr.u r`18+2*$i+1`=r`18+2*$i+1`,4 };; +{ .mmi; st8 [r14]=r`18+2*$i` // Htable[`8+$i`].lo>>4 + st8 [r15]=r`18+2*$i+1` } // Htable[`8+$i`].hi>>4 +___ + +$in="r15"; +@xi=("r16","r17"); +@rem=("r18","r19"); +($Alo,$Ahi,$Blo,$Bhi,$Zlo,$Zhi)=("r20","r21","r22","r23","r24","r25"); +($Atbl,$Btbl)=("r26","r27"); + +$code.=<<___; # (p16) +{ .mmi; ld1 $in=[$inp],-1 //(p16) *inp-- + ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- + cmp.eq p0,p6=r0,r0 };; // clear p6 +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers + +$code.=<<___; # (p16),(p17) +{ .mmi; ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- + xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] +{ .mii; ld1 $in=[$inp],-1 //(p16) *inp-- + dep $Atbl=$xi[1],$Htbl,4,4 //(p17) &Htable[nlo].lo + and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 +.align 32 +.LOOP: +{ .mmi; +(p6) st8 [$Xip]=$Zhi,13 + xor $Zlo=$Zlo,$Zlo + add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi].lo +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers + +$code.=<<___; # (p16),(p17),(p18) +{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi + ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo + xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] +{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi + dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo +{ .mfi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 + xor $Zlo=$Zlo,$Alo };; //(p18) Z.lo^=Htable[nlo].lo +{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi + ld1 $in=[$inp],-1 } //(p16) *inp-- +{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) + mov $Zhi=$Ahi //(p18) Z.hi^=Htable[nlo].hi + and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 +{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi + ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- + shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) +{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff + add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers + +for ($i=1;$i<14;$i++) { +# Above and below fragments are derived from this one by removing +# unsuitable (p??) instructions. +$code.=<<___; # (p16),(p17),(p18),(p19) +{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi + ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo + shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 +{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] + xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo + xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] +{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi + ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] + dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo +{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 + xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo + xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi +{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi + ld1 $in=[$inp],-1 //(p16) *inp-- + shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 +{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) + xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi + and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 +{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi + ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- + shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) +{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff + xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 + add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers +} + +$code.=<<___; # (p17),(p18),(p19) +{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi + ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo + shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 +{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] + xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo + xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] +{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi + ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] + dep $Atbl=$xi[1],$Htbl,4,4 };; //(p17) &Htable[nlo].lo +{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 + xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo + xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi +{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi + shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 +{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) + xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi + and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 +{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi + shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) +{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff + xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 + add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers + +$code.=<<___; # (p18),(p19) +{ .mfi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi + shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 +{ .mfi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] + xor $Zlo=$Zlo,$Blo };; //(p19) Z.lo^=Hshr4[nhi].lo +{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi + xor $Zlo=$Zlo,$Alo } //(p18) Z.lo^=Htable[nlo].lo +{ .mfi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] + xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi +{ .mfi; ld8 $Blo=[$Btbl],8 //(p18) Htable[nhi].lo,&Htable[nhi].hi + shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 +{ .mfi; shladd $rem[0]=$Zlo,4,r0 //(p18) Z.lo<<4 + xor $Zhi=$Zhi,$Ahi };; //(p18) Z.hi^=Htable[nlo].hi +{ .mfi; ld8 $Bhi=[$Btbl] //(p18) Htable[nhi].hi + shrp $Zlo=$Zhi,$Zlo,4 } //(p18) Z.lo=(Z.hi<<60)|(Z.lo>>4) +{ .mfi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff + xor $Zhi=$Zhi,$rem[1] };; //(p19) Z.hi^=rem_8bit[rem]<<48 +___ +push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers + +$code.=<<___; # (p19) +{ .mmi; cmp.ltu p6,p0=$inp,$len + add $inp=32,$inp + shr.u $Zhi=$Zhi,4 } //(p19) Z.hi>>=4 +{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] + xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo + add $Xip=9,$Xip };; // &Xi.lo +{ .mmi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] +(p6) ld1 $in=[$inp],-1 //[p16] *inp-- +(p6) extr.u $xi[1]=$Zlo,8,8 } //[p17] Xi[14] +{ .mmi; xor $Zhi=$Zhi,$Bhi //(p19) Z.hi^=Hshr4[nhi].hi +(p6) and $xi[0]=$Zlo,$mask0xff };; //[p16] Xi[15] +{ .mmi; st8 [$Xip]=$Zlo,-8 +(p6) xor $xi[0]=$xi[0],$in //[p17] xi=$xi[i]^inp[i] + shl $rem[1]=$rem[1],48 };; //(p19) rem_8bit[rem]<<48 +{ .mmi; +(p6) ld1 $in=[$inp],-1 //[p16] *inp-- + xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 +(p6) dep $Atbl=$xi[0],$Htbl,4,4 } //[p17] &Htable[nlo].lo +{ .mib; +(p6) and $xi[0]=-16,$xi[0] //[p17] nhi=xi&0xf0 +(p6) br.cond.dptk.many .LOOP };; + +{ .mib; st8 [$Xip]=$Zhi };; +{ .mib; $rum 1<<1 // return to little-endian + .restore sp + mov sp=prevsp + br.ret.sptk.many b0 };; +.endp gcm_ghash_4bit# +___ +$code.=<<___; +.align 128 +.type rem_4bit#,\@object +rem_4bit: + data8 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48 + data8 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48 + data8 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48 + data8 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48 +.size rem_4bit#,128 +.type rem_8bit#,\@object +rem_8bit: + data1 0x00,0x00, 0x01,0xC2, 0x03,0x84, 0x02,0x46, 0x07,0x08, 0x06,0xCA, 0x04,0x8C, 0x05,0x4E + data1 0x0E,0x10, 0x0F,0xD2, 0x0D,0x94, 0x0C,0x56, 0x09,0x18, 0x08,0xDA, 0x0A,0x9C, 0x0B,0x5E + data1 0x1C,0x20, 0x1D,0xE2, 0x1F,0xA4, 0x1E,0x66, 0x1B,0x28, 0x1A,0xEA, 0x18,0xAC, 0x19,0x6E + data1 0x12,0x30, 0x13,0xF2, 0x11,0xB4, 0x10,0x76, 0x15,0x38, 0x14,0xFA, 0x16,0xBC, 0x17,0x7E + data1 0x38,0x40, 0x39,0x82, 0x3B,0xC4, 0x3A,0x06, 0x3F,0x48, 0x3E,0x8A, 0x3C,0xCC, 0x3D,0x0E + data1 0x36,0x50, 0x37,0x92, 0x35,0xD4, 0x34,0x16, 0x31,0x58, 0x30,0x9A, 0x32,0xDC, 0x33,0x1E + data1 0x24,0x60, 0x25,0xA2, 0x27,0xE4, 0x26,0x26, 0x23,0x68, 0x22,0xAA, 0x20,0xEC, 0x21,0x2E + data1 0x2A,0x70, 0x2B,0xB2, 0x29,0xF4, 0x28,0x36, 0x2D,0x78, 0x2C,0xBA, 0x2E,0xFC, 0x2F,0x3E + data1 0x70,0x80, 0x71,0x42, 0x73,0x04, 0x72,0xC6, 0x77,0x88, 0x76,0x4A, 0x74,0x0C, 0x75,0xCE + data1 0x7E,0x90, 0x7F,0x52, 0x7D,0x14, 0x7C,0xD6, 0x79,0x98, 0x78,0x5A, 0x7A,0x1C, 0x7B,0xDE + data1 0x6C,0xA0, 0x6D,0x62, 0x6F,0x24, 0x6E,0xE6, 0x6B,0xA8, 0x6A,0x6A, 0x68,0x2C, 0x69,0xEE + data1 0x62,0xB0, 0x63,0x72, 0x61,0x34, 0x60,0xF6, 0x65,0xB8, 0x64,0x7A, 0x66,0x3C, 0x67,0xFE + data1 0x48,0xC0, 0x49,0x02, 0x4B,0x44, 0x4A,0x86, 0x4F,0xC8, 0x4E,0x0A, 0x4C,0x4C, 0x4D,0x8E + data1 0x46,0xD0, 0x47,0x12, 0x45,0x54, 0x44,0x96, 0x41,0xD8, 0x40,0x1A, 0x42,0x5C, 0x43,0x9E + data1 0x54,0xE0, 0x55,0x22, 0x57,0x64, 0x56,0xA6, 0x53,0xE8, 0x52,0x2A, 0x50,0x6C, 0x51,0xAE + data1 0x5A,0xF0, 0x5B,0x32, 0x59,0x74, 0x58,0xB6, 0x5D,0xF8, 0x5C,0x3A, 0x5E,0x7C, 0x5F,0xBE + data1 0xE1,0x00, 0xE0,0xC2, 0xE2,0x84, 0xE3,0x46, 0xE6,0x08, 0xE7,0xCA, 0xE5,0x8C, 0xE4,0x4E + data1 0xEF,0x10, 0xEE,0xD2, 0xEC,0x94, 0xED,0x56, 0xE8,0x18, 0xE9,0xDA, 0xEB,0x9C, 0xEA,0x5E + data1 0xFD,0x20, 0xFC,0xE2, 0xFE,0xA4, 0xFF,0x66, 0xFA,0x28, 0xFB,0xEA, 0xF9,0xAC, 0xF8,0x6E + data1 0xF3,0x30, 0xF2,0xF2, 0xF0,0xB4, 0xF1,0x76, 0xF4,0x38, 0xF5,0xFA, 0xF7,0xBC, 0xF6,0x7E + data1 0xD9,0x40, 0xD8,0x82, 0xDA,0xC4, 0xDB,0x06, 0xDE,0x48, 0xDF,0x8A, 0xDD,0xCC, 0xDC,0x0E + data1 0xD7,0x50, 0xD6,0x92, 0xD4,0xD4, 0xD5,0x16, 0xD0,0x58, 0xD1,0x9A, 0xD3,0xDC, 0xD2,0x1E + data1 0xC5,0x60, 0xC4,0xA2, 0xC6,0xE4, 0xC7,0x26, 0xC2,0x68, 0xC3,0xAA, 0xC1,0xEC, 0xC0,0x2E + data1 0xCB,0x70, 0xCA,0xB2, 0xC8,0xF4, 0xC9,0x36, 0xCC,0x78, 0xCD,0xBA, 0xCF,0xFC, 0xCE,0x3E + data1 0x91,0x80, 0x90,0x42, 0x92,0x04, 0x93,0xC6, 0x96,0x88, 0x97,0x4A, 0x95,0x0C, 0x94,0xCE + data1 0x9F,0x90, 0x9E,0x52, 0x9C,0x14, 0x9D,0xD6, 0x98,0x98, 0x99,0x5A, 0x9B,0x1C, 0x9A,0xDE + data1 0x8D,0xA0, 0x8C,0x62, 0x8E,0x24, 0x8F,0xE6, 0x8A,0xA8, 0x8B,0x6A, 0x89,0x2C, 0x88,0xEE + data1 0x83,0xB0, 0x82,0x72, 0x80,0x34, 0x81,0xF6, 0x84,0xB8, 0x85,0x7A, 0x87,0x3C, 0x86,0xFE + data1 0xA9,0xC0, 0xA8,0x02, 0xAA,0x44, 0xAB,0x86, 0xAE,0xC8, 0xAF,0x0A, 0xAD,0x4C, 0xAC,0x8E + data1 0xA7,0xD0, 0xA6,0x12, 0xA4,0x54, 0xA5,0x96, 0xA0,0xD8, 0xA1,0x1A, 0xA3,0x5C, 0xA2,0x9E + data1 0xB5,0xE0, 0xB4,0x22, 0xB6,0x64, 0xB7,0xA6, 0xB2,0xE8, 0xB3,0x2A, 0xB1,0x6C, 0xB0,0xAE + data1 0xBB,0xF0, 0xBA,0x32, 0xB8,0x74, 0xB9,0xB6, 0xBC,0xF8, 0xBD,0x3A, 0xBF,0x7C, 0xBE,0xBE +.size rem_8bit#,512 +stringz "GHASH for IA64, CRYPTOGAMS by " +___ + +$code =~ s/mux1(\s+)\S+\@rev/nop.i$1 0x0/gm if ($big_endian); +$code =~ s/\`([^\`]*)\`/eval $1/gem; + +print $code; +close STDOUT; diff --git a/app/openssl/crypto/modes/asm/ghash-parisc.pl b/app/openssl/crypto/modes/asm/ghash-parisc.pl new file mode 100644 index 00000000..d5ad96b4 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-parisc.pl @@ -0,0 +1,731 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# April 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC +# it processes one byte in 19.6 cycles, which is more than twice as +# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for +# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per +# processed byte. This is ~2.2x faster than 64-bit code generated by +# vendor compiler (which used to be very hard to beat:-). +# +# Special thanks to polarhome.com for providing HP-UX account. + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; + $NREGS =6; +} else { + $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; + $NREGS =11; +} + +$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker + # [+ argument transfer] + +################# volatile registers +$Xi="%r26"; # argument block +$Htbl="%r25"; +$inp="%r24"; +$len="%r23"; +$Hhh=$Htbl; # variables +$Hll="%r22"; +$Zhh="%r21"; +$Zll="%r20"; +$cnt="%r19"; +$rem_4bit="%r28"; +$rem="%r29"; +$mask0xf0="%r31"; + +################# preserved registers +$Thh="%r1"; +$Tll="%r2"; +$nlo="%r3"; +$nhi="%r4"; +$byte="%r5"; +if ($SIZE_T==4) { + $Zhl="%r6"; + $Zlh="%r7"; + $Hhl="%r8"; + $Hlh="%r9"; + $Thl="%r10"; + $Tlh="%r11"; +} +$rem2="%r6"; # used in PA-RISC 2.0 code + +$code.=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 64 +gcm_gmult_4bit + .PROC + .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) +___ +$code.=<<___ if ($SIZE_T==4); + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) +___ +$code.=<<___; + blr %r0,$rem_4bit + ldi 3,$rem +L\$pic_gmult + andcm $rem_4bit,$rem,$rem_4bit + addl $inp,$len,$len + ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit + ldi 0xf0,$mask0xf0 +___ +$code.=<<___ if ($SIZE_T==4); + ldi 31,$rem + mtctl $rem,%cr11 + extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 + b L\$parisc1_gmult + nop +___ + +$code.=<<___; + ldb 15($Xi),$nlo + ldo 8($Htbl),$Hll + + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + + ldd $nlo($Hll),$Zll + ldd $nlo($Hhh),$Zhh + + depd,z $Zll,60,4,$rem + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldb 14($Xi),$nlo + + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + b L\$oop_gmult_pa2 + ldi 13,$cnt + + .ALIGN 8 +L\$oop_gmult_pa2 + xor $rem,$Zhh,$Zhh ; moved here to work around gas bug + depd,z $Zll,60,4,$rem + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nlo($Hll),$Tll + ldd $nlo($Hhh),$Thh + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + + xor $rem,$Zhh,$Zhh + depd,z $Zll,60,4,$rem + ldbx $cnt($Xi),$nlo + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + ldd $rem($rem_4bit),$rem + + xor $Tll,$Zll,$Zll + addib,uv -1,$cnt,L\$oop_gmult_pa2 + xor $Thh,$Zhh,$Zhh + + xor $rem,$Zhh,$Zhh + depd,z $Zll,60,4,$rem + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nlo($Hll),$Tll + ldd $nlo($Hhh),$Thh + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + + xor $rem,$Zhh,$Zhh + depd,z $Zll,60,4,$rem + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + + xor $rem,$Zhh,$Zhh + std $Zll,8($Xi) + std $Zhh,0($Xi) +___ + +$code.=<<___ if ($SIZE_T==4); + b L\$done_gmult + nop + +L\$parisc1_gmult + ldb 15($Xi),$nlo + ldo 12($Htbl),$Hll + ldo 8($Htbl),$Hlh + ldo 4($Htbl),$Hhl + + and $mask0xf0,$nlo,$nhi + zdep $nlo,27,4,$nlo + + ldwx $nlo($Hll),$Zll + ldwx $nlo($Hlh),$Zlh + ldwx $nlo($Hhl),$Zhl + ldwx $nlo($Hhh),$Zhh + zdep $Zll,28,4,$rem + ldb 14($Xi),$nlo + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhl,$Zlh,4,$Zlh + ldwx $nhi($Hlh),$Tlh + shrpw $Zhh,$Zhl,4,$Zhl + ldwx $nhi($Hhl),$Thl + extru $Zhh,27,28,$Zhh + ldwx $nhi($Hhh),$Thh + xor $rem,$Zhh,$Zhh + and $mask0xf0,$nlo,$nhi + zdep $nlo,27,4,$nlo + + xor $Tll,$Zll,$Zll + ldwx $nlo($Hll),$Tll + xor $Tlh,$Zlh,$Zlh + ldwx $nlo($Hlh),$Tlh + xor $Thl,$Zhl,$Zhl + b L\$oop_gmult_pa1 + ldi 13,$cnt + + .ALIGN 8 +L\$oop_gmult_pa1 + zdep $Zll,28,4,$rem + ldwx $nlo($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nlo($Hhh),$Thh + shrpw $Zhl,$Zlh,4,$Zlh + ldbx $cnt($Xi),$nlo + xor $Tll,$Zll,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhh,$Zhl,4,$Zhl + xor $Tlh,$Zlh,$Zlh + ldwx $nhi($Hlh),$Tlh + extru $Zhh,27,28,$Zhh + xor $Thl,$Zhl,$Zhl + ldwx $nhi($Hhl),$Thl + xor $rem,$Zhh,$Zhh + zdep $Zll,28,4,$rem + xor $Thh,$Zhh,$Zhh + ldwx $nhi($Hhh),$Thh + shrpw $Zlh,$Zll,4,$Zll + ldwx $rem($rem_4bit),$rem + shrpw $Zhl,$Zlh,4,$Zlh + shrpw $Zhh,$Zhl,4,$Zhl + and $mask0xf0,$nlo,$nhi + extru $Zhh,27,28,$Zhh + zdep $nlo,27,4,$nlo + xor $Tll,$Zll,$Zll + ldwx $nlo($Hll),$Tll + xor $Tlh,$Zlh,$Zlh + ldwx $nlo($Hlh),$Tlh + xor $rem,$Zhh,$Zhh + addib,uv -1,$cnt,L\$oop_gmult_pa1 + xor $Thl,$Zhl,$Zhl + + zdep $Zll,28,4,$rem + ldwx $nlo($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nlo($Hhh),$Thh + shrpw $Zhl,$Zlh,4,$Zlh + xor $Tll,$Zll,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhh,$Zhl,4,$Zhl + xor $Tlh,$Zlh,$Zlh + ldwx $nhi($Hlh),$Tlh + extru $Zhh,27,28,$Zhh + xor $rem,$Zhh,$Zhh + xor $Thl,$Zhl,$Zhl + ldwx $nhi($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $nhi($Hhh),$Thh + zdep $Zll,28,4,$rem + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + shrpw $Zhl,$Zlh,4,$Zlh + shrpw $Zhh,$Zhl,4,$Zhl + extru $Zhh,27,28,$Zhh + xor $Tll,$Zll,$Zll + xor $Tlh,$Zlh,$Zlh + xor $rem,$Zhh,$Zhh + stw $Zll,12($Xi) + xor $Thl,$Zhl,$Zhl + stw $Zlh,8($Xi) + xor $Thh,$Zhh,$Zhh + stw $Zhl,4($Xi) + stw $Zhh,0($Xi) +___ +$code.=<<___; +L\$done_gmult + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 +___ +$code.=<<___ if ($SIZE_T==4); + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 +___ +$code.=<<___; + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + + .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR + .ALIGN 64 +gcm_ghash_4bit + .PROC + .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) +___ +$code.=<<___ if ($SIZE_T==4); + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) +___ +$code.=<<___; + blr %r0,$rem_4bit + ldi 3,$rem +L\$pic_ghash + andcm $rem_4bit,$rem,$rem_4bit + addl $inp,$len,$len + ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit + ldi 0xf0,$mask0xf0 +___ +$code.=<<___ if ($SIZE_T==4); + ldi 31,$rem + mtctl $rem,%cr11 + extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 + b L\$parisc1_ghash + nop +___ + +$code.=<<___; + ldb 15($Xi),$nlo + ldo 8($Htbl),$Hll + +L\$outer_ghash_pa2 + ldb 15($inp),$nhi + xor $nhi,$nlo,$nlo + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + + ldd $nlo($Hll),$Zll + ldd $nlo($Hhh),$Zhh + + depd,z $Zll,60,4,$rem + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldb 14($Xi),$nlo + ldb 14($inp),$byte + + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + xor $byte,$nlo,$nlo + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + b L\$oop_ghash_pa2 + ldi 13,$cnt + + .ALIGN 8 +L\$oop_ghash_pa2 + xor $rem,$Zhh,$Zhh ; moved here to work around gas bug + depd,z $Zll,60,4,$rem2 + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nlo($Hll),$Tll + ldd $nlo($Hhh),$Thh + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldbx $cnt($Xi),$nlo + ldbx $cnt($inp),$byte + + depd,z $Zll,60,4,$rem + shrpd $Zhh,$Zll,4,$Zll + ldd $rem2($rem_4bit),$rem2 + + xor $rem2,$Zhh,$Zhh + xor $byte,$nlo,$nlo + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + + and $mask0xf0,$nlo,$nhi + depd,z $nlo,59,4,$nlo + + extrd,u $Zhh,59,60,$Zhh + xor $Tll,$Zll,$Zll + + ldd $rem($rem_4bit),$rem + addib,uv -1,$cnt,L\$oop_ghash_pa2 + xor $Thh,$Zhh,$Zhh + + xor $rem,$Zhh,$Zhh + depd,z $Zll,60,4,$rem2 + + shrpd $Zhh,$Zll,4,$Zll + extrd,u $Zhh,59,60,$Zhh + ldd $nlo($Hll),$Tll + ldd $nlo($Hhh),$Thh + + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + + depd,z $Zll,60,4,$rem + shrpd $Zhh,$Zll,4,$Zll + ldd $rem2($rem_4bit),$rem2 + + xor $rem2,$Zhh,$Zhh + ldd $nhi($Hll),$Tll + ldd $nhi($Hhh),$Thh + + extrd,u $Zhh,59,60,$Zhh + xor $Tll,$Zll,$Zll + xor $Thh,$Zhh,$Zhh + ldd $rem($rem_4bit),$rem + + xor $rem,$Zhh,$Zhh + std $Zll,8($Xi) + ldo 16($inp),$inp + std $Zhh,0($Xi) + cmpb,*<> $inp,$len,L\$outer_ghash_pa2 + copy $Zll,$nlo +___ + +$code.=<<___ if ($SIZE_T==4); + b L\$done_ghash + nop + +L\$parisc1_ghash + ldb 15($Xi),$nlo + ldo 12($Htbl),$Hll + ldo 8($Htbl),$Hlh + ldo 4($Htbl),$Hhl + +L\$outer_ghash_pa1 + ldb 15($inp),$byte + xor $byte,$nlo,$nlo + and $mask0xf0,$nlo,$nhi + zdep $nlo,27,4,$nlo + + ldwx $nlo($Hll),$Zll + ldwx $nlo($Hlh),$Zlh + ldwx $nlo($Hhl),$Zhl + ldwx $nlo($Hhh),$Zhh + zdep $Zll,28,4,$rem + ldb 14($Xi),$nlo + ldb 14($inp),$byte + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhl,$Zlh,4,$Zlh + ldwx $nhi($Hlh),$Tlh + shrpw $Zhh,$Zhl,4,$Zhl + ldwx $nhi($Hhl),$Thl + extru $Zhh,27,28,$Zhh + ldwx $nhi($Hhh),$Thh + xor $byte,$nlo,$nlo + xor $rem,$Zhh,$Zhh + and $mask0xf0,$nlo,$nhi + zdep $nlo,27,4,$nlo + + xor $Tll,$Zll,$Zll + ldwx $nlo($Hll),$Tll + xor $Tlh,$Zlh,$Zlh + ldwx $nlo($Hlh),$Tlh + xor $Thl,$Zhl,$Zhl + b L\$oop_ghash_pa1 + ldi 13,$cnt + + .ALIGN 8 +L\$oop_ghash_pa1 + zdep $Zll,28,4,$rem + ldwx $nlo($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nlo($Hhh),$Thh + shrpw $Zhl,$Zlh,4,$Zlh + ldbx $cnt($Xi),$nlo + xor $Tll,$Zll,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhh,$Zhl,4,$Zhl + ldbx $cnt($inp),$byte + xor $Tlh,$Zlh,$Zlh + ldwx $nhi($Hlh),$Tlh + extru $Zhh,27,28,$Zhh + xor $Thl,$Zhl,$Zhl + ldwx $nhi($Hhl),$Thl + xor $rem,$Zhh,$Zhh + zdep $Zll,28,4,$rem + xor $Thh,$Zhh,$Zhh + ldwx $nhi($Hhh),$Thh + shrpw $Zlh,$Zll,4,$Zll + ldwx $rem($rem_4bit),$rem + shrpw $Zhl,$Zlh,4,$Zlh + xor $byte,$nlo,$nlo + shrpw $Zhh,$Zhl,4,$Zhl + and $mask0xf0,$nlo,$nhi + extru $Zhh,27,28,$Zhh + zdep $nlo,27,4,$nlo + xor $Tll,$Zll,$Zll + ldwx $nlo($Hll),$Tll + xor $Tlh,$Zlh,$Zlh + ldwx $nlo($Hlh),$Tlh + xor $rem,$Zhh,$Zhh + addib,uv -1,$cnt,L\$oop_ghash_pa1 + xor $Thl,$Zhl,$Zhl + + zdep $Zll,28,4,$rem + ldwx $nlo($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + ldwx $nlo($Hhh),$Thh + shrpw $Zhl,$Zlh,4,$Zlh + xor $Tll,$Zll,$Zll + ldwx $nhi($Hll),$Tll + shrpw $Zhh,$Zhl,4,$Zhl + xor $Tlh,$Zlh,$Zlh + ldwx $nhi($Hlh),$Tlh + extru $Zhh,27,28,$Zhh + xor $rem,$Zhh,$Zhh + xor $Thl,$Zhl,$Zhl + ldwx $nhi($Hhl),$Thl + xor $Thh,$Zhh,$Zhh + ldwx $nhi($Hhh),$Thh + zdep $Zll,28,4,$rem + ldwx $rem($rem_4bit),$rem + shrpw $Zlh,$Zll,4,$Zll + shrpw $Zhl,$Zlh,4,$Zlh + shrpw $Zhh,$Zhl,4,$Zhl + extru $Zhh,27,28,$Zhh + xor $Tll,$Zll,$Zll + xor $Tlh,$Zlh,$Zlh + xor $rem,$Zhh,$Zhh + stw $Zll,12($Xi) + xor $Thl,$Zhl,$Zhl + stw $Zlh,8($Xi) + xor $Thh,$Zhh,$Zhh + stw $Zhl,4($Xi) + ldo 16($inp),$inp + stw $Zhh,0($Xi) + comb,<> $inp,$len,L\$outer_ghash_pa1 + copy $Zll,$nlo +___ +$code.=<<___; +L\$done_ghash + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 +___ +$code.=<<___ if ($SIZE_T==4); + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 +___ +$code.=<<___; + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + + .ALIGN 64 +L\$rem_4bit + .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 + .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 + .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 + .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 + .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by " + .ALIGN 64 +___ + +# Explicitly encode PA-RISC 2.0 instructions used in this module, so +# that it can be compiled with .LEVEL 1.0. It should be noted that I +# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 +# directive... + +my $ldd = sub { + my ($mod,$args) = @_; + my $orig = "ldd$mod\t$args"; + + if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 + { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 + { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; + $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset + $opcode|=(1<<5) if ($mod =~ /^,m/); + $opcode|=(1<<13) if ($mod =~ /^,mb/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $std = sub { + my ($mod,$args) = @_; + my $orig = "std$mod\t$args"; + + if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices + { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $extrd = sub { + my ($mod,$args) = @_; + my $orig = "extrd$mod\t$args"; + + # I only have ",u" completer, it's implicitly encoded... + if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 + { my $opcode=(0x36<<26)|($1<<21)|($4<<16); + my $len=32-$3; + $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos + $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 + { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); + my $len=32-$2; + $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len + $opcode |= (1<<13) if ($mod =~ /,\**=/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $shrpd = sub { + my ($mod,$args) = @_; + my $orig = "shrpd$mod\t$args"; + + if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 + { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; + my $cpos=63-$3; + $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 + { sprintf "\t.WORD\t0x%08x\t; %s", + (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; + } + else { "\t".$orig; } +}; + +my $depd = sub { + my ($mod,$args) = @_; + my $orig = "depd$mod\t$args"; + + # I only have ",z" completer, it's impicitly encoded... + if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 + { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); + my $cpos=63-$2; + my $len=32-$3; + $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos + $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +sub assemble { + my ($mnemonic,$mod,$args)=@_; + my $opcode = eval("\$$mnemonic"); + + ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + if ($SIZE_T==4) { + s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; + s/cmpb,\*/comb,/; + s/,\*/,/; + } + s/\bbv\b/bve/ if ($SIZE_T==8); + print $_,"\n"; +} + +close STDOUT; diff --git a/app/openssl/crypto/modes/asm/ghash-s390x.pl b/app/openssl/crypto/modes/asm/ghash-s390x.pl new file mode 100644 index 00000000..6a40d5d8 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-s390x.pl @@ -0,0 +1,262 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# September 2010. +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+128 bytes shared table]. Performance +# was measured to be ~18 cycles per processed byte on z10, which is +# almost 40% better than gcc-generated code. It should be noted that +# 18 cycles is worse result than expected: loop is scheduled for 12 +# and the result should be close to 12. In the lack of instruction- +# level profiling data it's impossible to tell why... + +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 2.8x better than 32-bit code generated by gcc 4.3. + +# March 2011. +# +# Support for hardware KIMD-GHASH is verified to produce correct +# result and therefore is engaged. On z196 it was measured to process +# 8KB buffer ~7 faster than software implementation. It's not as +# impressive for smaller buffer sizes and for smallest 16-bytes buffer +# it's actually almost 2 times slower. Which is the reason why +# KIMD-GHASH is not used in gcm_gmult_4bit. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$softonly=0; + +$Zhi="%r0"; +$Zlo="%r1"; + +$Xi="%r2"; # argument block +$Htbl="%r3"; +$inp="%r4"; +$len="%r5"; + +$rem0="%r6"; # variables +$rem1="%r7"; +$nlo="%r8"; +$nhi="%r9"; +$xi="%r10"; +$cnt="%r11"; +$tmp="%r12"; +$x78="%r13"; +$rem_4bit="%r14"; + +$sp="%r15"; + +$code.=<<___; +.text + +.globl gcm_gmult_4bit +.align 32 +gcm_gmult_4bit: +___ +$code.=<<___ if(!$softonly && 0); # hardware is slow for single block... + larl %r1,OPENSSL_s390xcap_P + lg %r0,0(%r1) + tmhl %r0,0x4000 # check for message-security-assist + jz .Lsoft_gmult + lghi %r0,0 + la %r1,16($sp) + .long 0xb93e0004 # kimd %r0,%r4 + lg %r1,24($sp) + tmhh %r1,0x4000 # check for function 65 + jz .Lsoft_gmult + stg %r0,16($sp) # arrange 16 bytes of zero input + stg %r0,24($sp) + lghi %r0,65 # function 65 + la %r1,0($Xi) # H lies right after Xi in gcm128_context + la $inp,16($sp) + lghi $len,16 + .long 0xb93e0004 # kimd %r0,$inp + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 32 +.Lsoft_gmult: +___ +$code.=<<___; + stm${g} %r6,%r14,6*$SIZE_T($sp) + + aghi $Xi,-1 + lghi $len,1 + lghi $x78,`0xf<<3` + larl $rem_4bit,rem_4bit + + lg $Zlo,8+1($Xi) # Xi + j .Lgmult_shortcut +.type gcm_gmult_4bit,\@function +.size gcm_gmult_4bit,(.-gcm_gmult_4bit) + +.globl gcm_ghash_4bit +.align 32 +gcm_ghash_4bit: +___ +$code.=<<___ if(!$softonly); + larl %r1,OPENSSL_s390xcap_P + lg %r0,0(%r1) + tmhl %r0,0x4000 # check for message-security-assist + jz .Lsoft_ghash + lghi %r0,0 + la %r1,16($sp) + .long 0xb93e0004 # kimd %r0,%r4 + lg %r1,24($sp) + tmhh %r1,0x4000 # check for function 65 + jz .Lsoft_ghash + lghi %r0,65 # function 65 + la %r1,0($Xi) # H lies right after Xi in gcm128_context + .long 0xb93e0004 # kimd %r0,$inp + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 32 +.Lsoft_ghash: +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; + stm${g} %r6,%r14,6*$SIZE_T($sp) + + aghi $Xi,-1 + srlg $len,$len,4 + lghi $x78,`0xf<<3` + larl $rem_4bit,rem_4bit + + lg $Zlo,8+1($Xi) # Xi + lg $Zhi,0+1($Xi) + lghi $tmp,0 +.Louter: + xg $Zhi,0($inp) # Xi ^= inp + xg $Zlo,8($inp) + xgr $Zhi,$tmp + stg $Zlo,8+1($Xi) + stg $Zhi,0+1($Xi) + +.Lgmult_shortcut: + lghi $tmp,0xf0 + sllg $nlo,$Zlo,4 + srlg $xi,$Zlo,8 # extract second byte + ngr $nlo,$tmp + lgr $nhi,$Zlo + lghi $cnt,14 + ngr $nhi,$tmp + + lg $Zlo,8($nlo,$Htbl) + lg $Zhi,0($nlo,$Htbl) + + sllg $nlo,$xi,4 + sllg $rem0,$Zlo,3 + ngr $nlo,$tmp + ngr $rem0,$x78 + ngr $xi,$tmp + + sllg $tmp,$Zhi,60 + srlg $Zlo,$Zlo,4 + srlg $Zhi,$Zhi,4 + xg $Zlo,8($nhi,$Htbl) + xg $Zhi,0($nhi,$Htbl) + lgr $nhi,$xi + sllg $rem1,$Zlo,3 + xgr $Zlo,$tmp + ngr $rem1,$x78 + j .Lghash_inner +.align 16 +.Lghash_inner: + srlg $Zlo,$Zlo,4 + sllg $tmp,$Zhi,60 + xg $Zlo,8($nlo,$Htbl) + srlg $Zhi,$Zhi,4 + llgc $xi,0($cnt,$Xi) + xg $Zhi,0($nlo,$Htbl) + sllg $nlo,$xi,4 + xg $Zhi,0($rem0,$rem_4bit) + nill $nlo,0xf0 + sllg $rem0,$Zlo,3 + xgr $Zlo,$tmp + ngr $rem0,$x78 + nill $xi,0xf0 + + sllg $tmp,$Zhi,60 + srlg $Zlo,$Zlo,4 + srlg $Zhi,$Zhi,4 + xg $Zlo,8($nhi,$Htbl) + xg $Zhi,0($nhi,$Htbl) + lgr $nhi,$xi + xg $Zhi,0($rem1,$rem_4bit) + sllg $rem1,$Zlo,3 + xgr $Zlo,$tmp + ngr $rem1,$x78 + brct $cnt,.Lghash_inner + + sllg $tmp,$Zhi,60 + srlg $Zlo,$Zlo,4 + srlg $Zhi,$Zhi,4 + xg $Zlo,8($nlo,$Htbl) + xg $Zhi,0($nlo,$Htbl) + sllg $xi,$Zlo,3 + xg $Zhi,0($rem0,$rem_4bit) + xgr $Zlo,$tmp + ngr $xi,$x78 + + sllg $tmp,$Zhi,60 + srlg $Zlo,$Zlo,4 + srlg $Zhi,$Zhi,4 + xg $Zlo,8($nhi,$Htbl) + xg $Zhi,0($nhi,$Htbl) + xgr $Zlo,$tmp + xg $Zhi,0($rem1,$rem_4bit) + + lg $tmp,0($xi,$rem_4bit) + la $inp,16($inp) + sllg $tmp,$tmp,4 # correct last rem_4bit[rem] + brctg $len,.Louter + + xgr $Zhi,$tmp + stg $Zlo,8+1($Xi) + stg $Zhi,0+1($Xi) + lm${g} %r6,%r14,6*$SIZE_T($sp) + br %r14 +.type gcm_ghash_4bit,\@function +.size gcm_ghash_4bit,(.-gcm_ghash_4bit) + +.align 64 +rem_4bit: + .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0 + .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0 + .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0 + .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0 +.type rem_4bit,\@object +.size rem_4bit,(.-rem_4bit) +.string "GHASH for s390x, CRYPTOGAMS by " +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/modes/asm/ghash-sparcv9.pl b/app/openssl/crypto/modes/asm/ghash-sparcv9.pl new file mode 100644 index 00000000..70e7b044 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-sparcv9.pl @@ -0,0 +1,330 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# March 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+128 bytes shared table]. Performance +# results are for streamed GHASH subroutine on UltraSPARC pre-Tx CPU +# and are expressed in cycles per processed byte, less is better: +# +# gcc 3.3.x cc 5.2 this assembler +# +# 32-bit build 81.4 43.3 12.6 (+546%/+244%) +# 64-bit build 20.2 21.2 12.6 (+60%/+68%) +# +# Here is data collected on UltraSPARC T1 system running Linux: +# +# gcc 4.4.1 this assembler +# +# 32-bit build 566 50 (+1000%) +# 64-bit build 56 50 (+12%) +# +# I don't quite understand why difference between 32-bit and 64-bit +# compiler-generated code is so big. Compilers *were* instructed to +# generate code for UltraSPARC and should have used 64-bit registers +# for Z vector (see C code) even in 32-bit build... Oh well, it only +# means more impressive improvement coefficients for this assembler +# module;-) Loops are aggressively modulo-scheduled in respect to +# references to input data and Z.hi updates to achieve 12 cycles +# timing. To anchor to something else, sha1-sparcv9.pl spends 11.6 +# cycles to process one byte on UltraSPARC pre-Tx CPU and ~24 on T1. + +$bits=32; +for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } +if ($bits==64) { $bias=2047; $frame=192; } +else { $bias=0; $frame=112; } + +$output=shift; +open STDOUT,">$output"; + +$Zhi="%o0"; # 64-bit values +$Zlo="%o1"; +$Thi="%o2"; +$Tlo="%o3"; +$rem="%o4"; +$tmp="%o5"; + +$nhi="%l0"; # small values and pointers +$nlo="%l1"; +$xi0="%l2"; +$xi1="%l3"; +$rem_4bit="%l4"; +$remi="%l5"; +$Htblo="%l6"; +$cnt="%l7"; + +$Xi="%i0"; # input argument block +$Htbl="%i1"; +$inp="%i2"; +$len="%i3"; + +$code.=<<___; +.section ".text",#alloc,#execinstr + +.align 64 +rem_4bit: + .long `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 + .long `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 + .long `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 + .long `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 +.type rem_4bit,#object +.size rem_4bit,(.-rem_4bit) + +.globl gcm_ghash_4bit +.align 32 +gcm_ghash_4bit: + save %sp,-$frame,%sp + ldub [$inp+15],$nlo + ldub [$Xi+15],$xi0 + ldub [$Xi+14],$xi1 + add $len,$inp,$len + add $Htbl,8,$Htblo + +1: call .+8 + add %o7,rem_4bit-1b,$rem_4bit + +.Louter: + xor $xi0,$nlo,$nlo + and $nlo,0xf0,$nhi + and $nlo,0x0f,$nlo + sll $nlo,4,$nlo + ldx [$Htblo+$nlo],$Zlo + ldx [$Htbl+$nlo],$Zhi + + ldub [$inp+14],$nlo + + ldx [$Htblo+$nhi],$Tlo + and $Zlo,0xf,$remi + ldx [$Htbl+$nhi],$Thi + sll $remi,3,$remi + ldx [$rem_4bit+$remi],$rem + srlx $Zlo,4,$Zlo + mov 13,$cnt + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + + xor $xi1,$nlo,$nlo + and $Zlo,0xf,$remi + and $nlo,0xf0,$nhi + and $nlo,0x0f,$nlo + ba .Lghash_inner + sll $nlo,4,$nlo +.align 32 +.Lghash_inner: + ldx [$Htblo+$nlo],$Tlo + sll $remi,3,$remi + xor $Thi,$Zhi,$Zhi + ldx [$Htbl+$nlo],$Thi + srlx $Zlo,4,$Zlo + xor $rem,$Zhi,$Zhi + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + ldub [$inp+$cnt],$nlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + ldub [$Xi+$cnt],$xi1 + xor $Thi,$Zhi,$Zhi + and $Zlo,0xf,$remi + + ldx [$Htblo+$nhi],$Tlo + sll $remi,3,$remi + xor $rem,$Zhi,$Zhi + ldx [$Htbl+$nhi],$Thi + srlx $Zlo,4,$Zlo + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $xi1,$nlo,$nlo + srlx $Zhi,4,$Zhi + and $nlo,0xf0,$nhi + addcc $cnt,-1,$cnt + xor $Zlo,$tmp,$Zlo + and $nlo,0x0f,$nlo + xor $Tlo,$Zlo,$Zlo + sll $nlo,4,$nlo + blu .Lghash_inner + and $Zlo,0xf,$remi + + ldx [$Htblo+$nlo],$Tlo + sll $remi,3,$remi + xor $Thi,$Zhi,$Zhi + ldx [$Htbl+$nlo],$Thi + srlx $Zlo,4,$Zlo + xor $rem,$Zhi,$Zhi + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + + add $inp,16,$inp + cmp $inp,$len + be,pn `$bits==64?"%xcc":"%icc"`,.Ldone + and $Zlo,0xf,$remi + + ldx [$Htblo+$nhi],$Tlo + sll $remi,3,$remi + xor $rem,$Zhi,$Zhi + ldx [$Htbl+$nhi],$Thi + srlx $Zlo,4,$Zlo + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + ldub [$inp+15],$nlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + stx $Zlo,[$Xi+8] + xor $rem,$Zhi,$Zhi + stx $Zhi,[$Xi] + srl $Zlo,8,$xi1 + and $Zlo,0xff,$xi0 + ba .Louter + and $xi1,0xff,$xi1 +.align 32 +.Ldone: + ldx [$Htblo+$nhi],$Tlo + sll $remi,3,$remi + xor $rem,$Zhi,$Zhi + ldx [$Htbl+$nhi],$Thi + srlx $Zlo,4,$Zlo + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + stx $Zlo,[$Xi+8] + xor $rem,$Zhi,$Zhi + stx $Zhi,[$Xi] + + ret + restore +.type gcm_ghash_4bit,#function +.size gcm_ghash_4bit,(.-gcm_ghash_4bit) +___ + +undef $inp; +undef $len; + +$code.=<<___; +.globl gcm_gmult_4bit +.align 32 +gcm_gmult_4bit: + save %sp,-$frame,%sp + ldub [$Xi+15],$nlo + add $Htbl,8,$Htblo + +1: call .+8 + add %o7,rem_4bit-1b,$rem_4bit + + and $nlo,0xf0,$nhi + and $nlo,0x0f,$nlo + sll $nlo,4,$nlo + ldx [$Htblo+$nlo],$Zlo + ldx [$Htbl+$nlo],$Zhi + + ldub [$Xi+14],$nlo + + ldx [$Htblo+$nhi],$Tlo + and $Zlo,0xf,$remi + ldx [$Htbl+$nhi],$Thi + sll $remi,3,$remi + ldx [$rem_4bit+$remi],$rem + srlx $Zlo,4,$Zlo + mov 13,$cnt + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + + and $Zlo,0xf,$remi + and $nlo,0xf0,$nhi + and $nlo,0x0f,$nlo + ba .Lgmult_inner + sll $nlo,4,$nlo +.align 32 +.Lgmult_inner: + ldx [$Htblo+$nlo],$Tlo + sll $remi,3,$remi + xor $Thi,$Zhi,$Zhi + ldx [$Htbl+$nlo],$Thi + srlx $Zlo,4,$Zlo + xor $rem,$Zhi,$Zhi + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + ldub [$Xi+$cnt],$nlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + and $Zlo,0xf,$remi + + ldx [$Htblo+$nhi],$Tlo + sll $remi,3,$remi + xor $rem,$Zhi,$Zhi + ldx [$Htbl+$nhi],$Thi + srlx $Zlo,4,$Zlo + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + srlx $Zhi,4,$Zhi + and $nlo,0xf0,$nhi + addcc $cnt,-1,$cnt + xor $Zlo,$tmp,$Zlo + and $nlo,0x0f,$nlo + xor $Tlo,$Zlo,$Zlo + sll $nlo,4,$nlo + blu .Lgmult_inner + and $Zlo,0xf,$remi + + ldx [$Htblo+$nlo],$Tlo + sll $remi,3,$remi + xor $Thi,$Zhi,$Zhi + ldx [$Htbl+$nlo],$Thi + srlx $Zlo,4,$Zlo + xor $rem,$Zhi,$Zhi + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + and $Zlo,0xf,$remi + + ldx [$Htblo+$nhi],$Tlo + sll $remi,3,$remi + xor $rem,$Zhi,$Zhi + ldx [$Htbl+$nhi],$Thi + srlx $Zlo,4,$Zlo + ldx [$rem_4bit+$remi],$rem + sllx $Zhi,60,$tmp + xor $Tlo,$Zlo,$Zlo + srlx $Zhi,4,$Zhi + xor $Zlo,$tmp,$Zlo + xor $Thi,$Zhi,$Zhi + stx $Zlo,[$Xi+8] + xor $rem,$Zhi,$Zhi + stx $Zhi,[$Xi] + + ret + restore +.type gcm_gmult_4bit,#function +.size gcm_gmult_4bit,(.-gcm_gmult_4bit) +.asciz "GHASH for SPARCv9, CRYPTOGAMS by " +.align 4 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/modes/asm/ghash-x86.S b/app/openssl/crypto/modes/asm/ghash-x86.S new file mode 100644 index 00000000..50473201 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-x86.S @@ -0,0 +1,1269 @@ +.file "ghash-x86.s" +.text +.globl gcm_gmult_4bit_x86 +.type gcm_gmult_4bit_x86,@function +.align 16 +gcm_gmult_4bit_x86: +.L_gcm_gmult_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%edi + movl 108(%esp),%esi + movl (%edi),%ebp + movl 4(%edi),%edx + movl 8(%edi),%ecx + movl 12(%edi),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) + movl %ebp,(%esp) + movl %edx,4(%esp) + movl %ecx,8(%esp) + movl %ebx,12(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L000x86_loop +.align 16 +.L000x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L001x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L000x86_loop +.align 16 +.L001x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin +.globl gcm_ghash_4bit_x86 +.type gcm_ghash_4bit_x86,@function +.align 16 +gcm_ghash_4bit_x86: +.L_gcm_ghash_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%ebx + movl 108(%esp),%esi + movl 112(%esp),%edi + movl 116(%esp),%ecx + addl %edi,%ecx + movl %ecx,116(%esp) + movl (%ebx),%ebp + movl 4(%ebx),%edx + movl 8(%ebx),%ecx + movl 12(%ebx),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) +.align 16 +.L002x86_outer_loop: + xorl 12(%edi),%ebx + xorl 8(%edi),%ecx + xorl 4(%edi),%edx + xorl (%edi),%ebp + movl %ebx,12(%esp) + movl %ecx,8(%esp) + movl %edx,4(%esp) + movl %ebp,(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L003x86_loop +.align 16 +.L003x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L004x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L003x86_loop +.align 16 +.L004x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 112(%esp),%edi + leal 16(%edi),%edi + cmpl 116(%esp),%edi + movl %edi,112(%esp) + jb .L002x86_outer_loop + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin +.globl gcm_gmult_4bit_mmx +.type gcm_gmult_4bit_mmx,@function +.align 16 +gcm_gmult_4bit_mmx: +.L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call .L005pic_point +.L005pic_point: + popl %eax + leal .Lrem_4bit-.L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx + xorl %ecx,%ecx + movl %ebx,%edx + movb %dl,%cl + movl $14,%ebp + shlb $4,%cl + andl $240,%edx + movq 8(%esi,%ecx,1),%mm0 + movq (%esi,%ecx,1),%mm1 + movd %mm0,%ebx + jmp .L006mmx_loop +.align 16 +.L006mmx_loop: + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + movb (%edi,%ebp,1),%cl + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + decl %ebp + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + movl %ecx,%edx + pxor %mm2,%mm0 + js .L007mmx_break + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + jmp .L006mmx_loop +.align 16 +.L007mmx_break: + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + pxor %mm2,%mm0 + psrlq $32,%mm0 + movd %mm1,%edx + psrlq $32,%mm1 + movd %mm0,%ecx + movd %mm1,%ebp + bswap %ebx + bswap %edx + bswap %ecx + bswap %ebp + emms + movl %ebx,12(%edi) + movl %edx,4(%edi) + movl %ecx,8(%edi) + movl %ebp,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin +.globl gcm_ghash_4bit_mmx +.type gcm_ghash_4bit_mmx,@function +.align 16 +gcm_ghash_4bit_mmx: +.L_gcm_ghash_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call .L008pic_point +.L008pic_point: + popl %esi + leal .Lrem_8bit-.L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx +.align 16 +.L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne .L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call .L010pic +.L010pic: + popl %ecx + leal .Lbswap-.L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm2,(%edx) + movdqu %xmm0,16(%edx) + ret +.size gcm_init_clmul,.-.L_gcm_init_clmul_begin +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call .L011pic +.L011pic: + popl %ecx + leal .Lbswap-.L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: +.L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call .L012pic +.L012pic: + popl %ecx + leal .Lbswap-.L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz .L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 + movups 16(%edx),%xmm2 + leal 32(%esi),%esi + subl $32,%ebx + jbe .L014even_tail +.L015mod_loop: + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqu (%esi),%xmm3 + movups (%edx),%xmm2 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqa %xmm6,%xmm5 + movdqa %xmm6,%xmm7 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pshufd $78,%xmm5,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm5,%xmm3 + pshufd $78,%xmm2,%xmm5 + pxor %xmm2,%xmm5 +.byte 102,15,58,68,250,17 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm3 + movdqa %xmm3,%xmm5 + psrldq $8,%xmm3 + pslldq $8,%xmm5 + pxor %xmm3,%xmm7 + pxor %xmm5,%xmm6 + movdqa (%ecx),%xmm5 + leal 32(%esi),%esi + subl $32,%ebx + ja .L015mod_loop +.L014even_tail: + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + testl %ebx,%ebx + jnz .L016done + movups (%edx),%xmm2 +.L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin +.align 64 +.Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.align 64 +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.align 64 +.Lrem_8bit: +.value 0,450,900,582,1800,1738,1164,1358 +.value 3600,4050,3476,3158,2328,2266,2716,2910 +.value 7200,7650,8100,7782,6952,6890,6316,6510 +.value 4656,5106,4532,4214,5432,5370,5820,6014 +.value 14400,14722,15300,14854,16200,16010,15564,15630 +.value 13904,14226,13780,13334,12632,12442,13020,13086 +.value 9312,9634,10212,9766,9064,8874,8428,8494 +.value 10864,11186,10740,10294,11640,11450,12028,12094 +.value 28800,28994,29444,29382,30600,30282,29708,30158 +.value 32400,32594,32020,31958,31128,30810,31260,31710 +.value 27808,28002,28452,28390,27560,27242,26668,27118 +.value 25264,25458,24884,24822,26040,25722,26172,26622 +.value 18624,18690,19268,19078,20424,19978,19532,19854 +.value 18128,18194,17748,17558,16856,16410,16988,17310 +.value 21728,21794,22372,22182,21480,21034,20588,20910 +.value 23280,23346,22900,22710,24056,23610,24188,24510 +.value 57600,57538,57988,58182,58888,59338,58764,58446 +.value 61200,61138,60564,60758,59416,59866,60316,59998 +.value 64800,64738,65188,65382,64040,64490,63916,63598 +.value 62256,62194,61620,61814,62520,62970,63420,63102 +.value 55616,55426,56004,56070,56904,57226,56780,56334 +.value 55120,54930,54484,54550,53336,53658,54236,53790 +.value 50528,50338,50916,50982,49768,50090,49644,49198 +.value 52080,51890,51444,51510,52344,52666,53244,52798 +.value 37248,36930,37380,37830,38536,38730,38156,38094 +.value 40848,40530,39956,40406,39064,39258,39708,39646 +.value 36256,35938,36388,36838,35496,35690,35116,35054 +.value 33712,33394,32820,33270,33976,34170,34620,34558 +.value 43456,43010,43588,43910,44744,44810,44364,44174 +.value 42960,42514,42068,42390,41176,41242,41820,41630 +.value 46560,46114,46692,47014,45800,45866,45420,45230 +.value 48112,47666,47220,47542,48376,48442,49020,48830 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 diff --git a/app/openssl/crypto/modes/asm/ghash-x86.pl b/app/openssl/crypto/modes/asm/ghash-x86.pl new file mode 100644 index 00000000..2426cd0c --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-x86.pl @@ -0,0 +1,1342 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# March, May, June 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that it +# uses 256 bytes per-key table [+64/128 bytes fixed table]. It has two +# code paths: vanilla x86 and vanilla MMX. Former will be executed on +# 486 and Pentium, latter on all others. MMX GHASH features so called +# "528B" variant of "4-bit" method utilizing additional 256+16 bytes +# of per-key storage [+512 bytes shared table]. Performance results +# are for streamed GHASH subroutine and are expressed in cycles per +# processed byte, less is better: +# +# gcc 2.95.3(*) MMX assembler x86 assembler +# +# Pentium 105/111(**) - 50 +# PIII 68 /75 12.2 24 +# P4 125/125 17.8 84(***) +# Opteron 66 /70 10.1 30 +# Core2 54 /67 8.4 18 +# +# (*) gcc 3.4.x was observed to generate few percent slower code, +# which is one of reasons why 2.95.3 results were chosen, +# another reason is lack of 3.4.x results for older CPUs; +# comparison with MMX results is not completely fair, because C +# results are for vanilla "256B" implementation, while +# assembler results are for "528B";-) +# (**) second number is result for code compiled with -fPIC flag, +# which is actually more relevant, because assembler code is +# position-independent; +# (***) see comment in non-MMX routine for further details; +# +# To summarize, it's >2-5 times faster than gcc-generated code. To +# anchor it to something else SHA1 assembler processes one byte in +# 11-13 cycles on contemporary x86 cores. As for choice of MMX in +# particular, see comment at the end of the file... + +# May 2010 +# +# Add PCLMULQDQ version performing at 2.10 cycles per processed byte. +# The question is how close is it to theoretical limit? The pclmulqdq +# instruction latency appears to be 14 cycles and there can't be more +# than 2 of them executing at any given time. This means that single +# Karatsuba multiplication would take 28 cycles *plus* few cycles for +# pre- and post-processing. Then multiplication has to be followed by +# modulo-reduction. Given that aggregated reduction method [see +# "Carry-less Multiplication and Its Usage for Computing the GCM Mode" +# white paper by Intel] allows you to perform reduction only once in +# a while we can assume that asymptotic performance can be estimated +# as (28+Tmod/Naggr)/16, where Tmod is time to perform reduction +# and Naggr is the aggregation factor. +# +# Before we proceed to this implementation let's have closer look at +# the best-performing code suggested by Intel in their white paper. +# By tracing inter-register dependencies Tmod is estimated as ~19 +# cycles and Naggr chosen by Intel is 4, resulting in 2.05 cycles per +# processed byte. As implied, this is quite optimistic estimate, +# because it does not account for Karatsuba pre- and post-processing, +# which for a single multiplication is ~5 cycles. Unfortunately Intel +# does not provide performance data for GHASH alone. But benchmarking +# AES_GCM_encrypt ripped out of Fig. 15 of the white paper with aadt +# alone resulted in 2.46 cycles per byte of out 16KB buffer. Note that +# the result accounts even for pre-computing of degrees of the hash +# key H, but its portion is negligible at 16KB buffer size. +# +# Moving on to the implementation in question. Tmod is estimated as +# ~13 cycles and Naggr is 2, giving asymptotic performance of ... +# 2.16. How is it possible that measured performance is better than +# optimistic theoretical estimate? There is one thing Intel failed +# to recognize. By serializing GHASH with CTR in same subroutine +# former's performance is really limited to above (Tmul + Tmod/Naggr) +# equation. But if GHASH procedure is detached, the modulo-reduction +# can be interleaved with Naggr-1 multiplications at instruction level +# and under ideal conditions even disappear from the equation. So that +# optimistic theoretical estimate for this implementation is ... +# 28/16=1.75, and not 2.16. Well, it's probably way too optimistic, +# at least for such small Naggr. I'd argue that (28+Tproc/Naggr), +# where Tproc is time required for Karatsuba pre- and post-processing, +# is more realistic estimate. In this case it gives ... 1.91 cycles. +# Or in other words, depending on how well we can interleave reduction +# and one of the two multiplications the performance should be betwen +# 1.91 and 2.16. As already mentioned, this implementation processes +# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart +# - in 2.02. x86_64 performance is better, because larger register +# bank allows to interleave reduction and multiplication better. +# +# Does it make sense to increase Naggr? To start with it's virtually +# impossible in 32-bit mode, because of limited register bank +# capacity. Otherwise improvement has to be weighed agiainst slower +# setup, as well as code size and complexity increase. As even +# optimistic estimate doesn't promise 30% performance improvement, +# there are currently no plans to increase Naggr. +# +# Special thanks to David Woodhouse for +# providing access to a Westmere-based system on behalf of Intel +# Open Source Technology Centre. + +# January 2010 +# +# Tweaked to optimize transitions between integer and FP operations +# on same XMM register, PCLMULQDQ subroutine was measured to process +# one byte in 2.07 cycles on Sandy Bridge, and in 2.12 - on Westmere. +# The minor regression on Westmere is outweighed by ~15% improvement +# on Sandy Bridge. Strangely enough attempt to modify 64-bit code in +# similar manner resulted in almost 20% degradation on Sandy Bridge, +# where original 64-bit code processes one byte in 1.95 cycles. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); + +$sse2=0; +for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } + +($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); +$inp = "edi"; +$Htbl = "esi"; + +$unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse + # than unrolled, which has to be weighted against + # 2.5x x86-specific code size reduction. + +sub x86_loop { + my $off = shift; + my $rem = "eax"; + + &mov ($Zhh,&DWP(4,$Htbl,$Zll)); + &mov ($Zhl,&DWP(0,$Htbl,$Zll)); + &mov ($Zlh,&DWP(12,$Htbl,$Zll)); + &mov ($Zll,&DWP(8,$Htbl,$Zll)); + &xor ($rem,$rem); # avoid partial register stalls on PIII + + # shrd practically kills P4, 2.5x deterioration, but P4 has + # MMX code-path to execute. shrd runs tad faster [than twice + # the shifts, move's and or's] on pre-MMX Pentium (as well as + # PIII and Core2), *but* minimizes code size, spares register + # and thus allows to fold the loop... + if (!$unroll) { + my $cnt = $inp; + &mov ($cnt,15); + &jmp (&label("x86_loop")); + &set_label("x86_loop",16); + for($i=1;$i<=2;$i++) { + &mov (&LB($rem),&LB($Zll)); + &shrd ($Zll,$Zlh,4); + &and (&LB($rem),0xf); + &shrd ($Zlh,$Zhl,4); + &shrd ($Zhl,$Zhh,4); + &shr ($Zhh,4); + &xor ($Zhh,&DWP($off+16,"esp",$rem,4)); + + &mov (&LB($rem),&BP($off,"esp",$cnt)); + if ($i&1) { + &and (&LB($rem),0xf0); + } else { + &shl (&LB($rem),4); + } + + &xor ($Zll,&DWP(8,$Htbl,$rem)); + &xor ($Zlh,&DWP(12,$Htbl,$rem)); + &xor ($Zhl,&DWP(0,$Htbl,$rem)); + &xor ($Zhh,&DWP(4,$Htbl,$rem)); + + if ($i&1) { + &dec ($cnt); + &js (&label("x86_break")); + } else { + &jmp (&label("x86_loop")); + } + } + &set_label("x86_break",16); + } else { + for($i=1;$i<32;$i++) { + &comment($i); + &mov (&LB($rem),&LB($Zll)); + &shrd ($Zll,$Zlh,4); + &and (&LB($rem),0xf); + &shrd ($Zlh,$Zhl,4); + &shrd ($Zhl,$Zhh,4); + &shr ($Zhh,4); + &xor ($Zhh,&DWP($off+16,"esp",$rem,4)); + + if ($i&1) { + &mov (&LB($rem),&BP($off+15-($i>>1),"esp")); + &and (&LB($rem),0xf0); + } else { + &mov (&LB($rem),&BP($off+15-($i>>1),"esp")); + &shl (&LB($rem),4); + } + + &xor ($Zll,&DWP(8,$Htbl,$rem)); + &xor ($Zlh,&DWP(12,$Htbl,$rem)); + &xor ($Zhl,&DWP(0,$Htbl,$rem)); + &xor ($Zhh,&DWP(4,$Htbl,$rem)); + } + } + &bswap ($Zll); + &bswap ($Zlh); + &bswap ($Zhl); + if (!$x86only) { + &bswap ($Zhh); + } else { + &mov ("eax",$Zhh); + &bswap ("eax"); + &mov ($Zhh,"eax"); + } +} + +if ($unroll) { + &function_begin_B("_x86_gmult_4bit_inner"); + &x86_loop(4); + &ret (); + &function_end_B("_x86_gmult_4bit_inner"); +} + +sub deposit_rem_4bit { + my $bias = shift; + + &mov (&DWP($bias+0, "esp"),0x0000<<16); + &mov (&DWP($bias+4, "esp"),0x1C20<<16); + &mov (&DWP($bias+8, "esp"),0x3840<<16); + &mov (&DWP($bias+12,"esp"),0x2460<<16); + &mov (&DWP($bias+16,"esp"),0x7080<<16); + &mov (&DWP($bias+20,"esp"),0x6CA0<<16); + &mov (&DWP($bias+24,"esp"),0x48C0<<16); + &mov (&DWP($bias+28,"esp"),0x54E0<<16); + &mov (&DWP($bias+32,"esp"),0xE100<<16); + &mov (&DWP($bias+36,"esp"),0xFD20<<16); + &mov (&DWP($bias+40,"esp"),0xD940<<16); + &mov (&DWP($bias+44,"esp"),0xC560<<16); + &mov (&DWP($bias+48,"esp"),0x9180<<16); + &mov (&DWP($bias+52,"esp"),0x8DA0<<16); + &mov (&DWP($bias+56,"esp"),0xA9C0<<16); + &mov (&DWP($bias+60,"esp"),0xB5E0<<16); +} + +$suffix = $x86only ? "" : "_x86"; + +&function_begin("gcm_gmult_4bit".$suffix); + &stack_push(16+4+1); # +1 for stack alignment + &mov ($inp,&wparam(0)); # load Xi + &mov ($Htbl,&wparam(1)); # load Htable + + &mov ($Zhh,&DWP(0,$inp)); # load Xi[16] + &mov ($Zhl,&DWP(4,$inp)); + &mov ($Zlh,&DWP(8,$inp)); + &mov ($Zll,&DWP(12,$inp)); + + &deposit_rem_4bit(16); + + &mov (&DWP(0,"esp"),$Zhh); # copy Xi[16] on stack + &mov (&DWP(4,"esp"),$Zhl); + &mov (&DWP(8,"esp"),$Zlh); + &mov (&DWP(12,"esp"),$Zll); + &shr ($Zll,20); + &and ($Zll,0xf0); + + if ($unroll) { + &call ("_x86_gmult_4bit_inner"); + } else { + &x86_loop(0); + &mov ($inp,&wparam(0)); + } + + &mov (&DWP(12,$inp),$Zll); + &mov (&DWP(8,$inp),$Zlh); + &mov (&DWP(4,$inp),$Zhl); + &mov (&DWP(0,$inp),$Zhh); + &stack_pop(16+4+1); +&function_end("gcm_gmult_4bit".$suffix); + +&function_begin("gcm_ghash_4bit".$suffix); + &stack_push(16+4+1); # +1 for 64-bit alignment + &mov ($Zll,&wparam(0)); # load Xi + &mov ($Htbl,&wparam(1)); # load Htable + &mov ($inp,&wparam(2)); # load in + &mov ("ecx",&wparam(3)); # load len + &add ("ecx",$inp); + &mov (&wparam(3),"ecx"); + + &mov ($Zhh,&DWP(0,$Zll)); # load Xi[16] + &mov ($Zhl,&DWP(4,$Zll)); + &mov ($Zlh,&DWP(8,$Zll)); + &mov ($Zll,&DWP(12,$Zll)); + + &deposit_rem_4bit(16); + + &set_label("x86_outer_loop",16); + &xor ($Zll,&DWP(12,$inp)); # xor with input + &xor ($Zlh,&DWP(8,$inp)); + &xor ($Zhl,&DWP(4,$inp)); + &xor ($Zhh,&DWP(0,$inp)); + &mov (&DWP(12,"esp"),$Zll); # dump it on stack + &mov (&DWP(8,"esp"),$Zlh); + &mov (&DWP(4,"esp"),$Zhl); + &mov (&DWP(0,"esp"),$Zhh); + + &shr ($Zll,20); + &and ($Zll,0xf0); + + if ($unroll) { + &call ("_x86_gmult_4bit_inner"); + } else { + &x86_loop(0); + &mov ($inp,&wparam(2)); + } + &lea ($inp,&DWP(16,$inp)); + &cmp ($inp,&wparam(3)); + &mov (&wparam(2),$inp) if (!$unroll); + &jb (&label("x86_outer_loop")); + + &mov ($inp,&wparam(0)); # load Xi + &mov (&DWP(12,$inp),$Zll); + &mov (&DWP(8,$inp),$Zlh); + &mov (&DWP(4,$inp),$Zhl); + &mov (&DWP(0,$inp),$Zhh); + &stack_pop(16+4+1); +&function_end("gcm_ghash_4bit".$suffix); + +if (!$x86only) {{{ + +&static_label("rem_4bit"); + +if (!$sse2) {{ # pure-MMX "May" version... + +$S=12; # shift factor for rem_4bit + +&function_begin_B("_mmx_gmult_4bit_inner"); +# MMX version performs 3.5 times better on P4 (see comment in non-MMX +# routine for further details), 100% better on Opteron, ~70% better +# on Core2 and PIII... In other words effort is considered to be well +# spent... Since initial release the loop was unrolled in order to +# "liberate" register previously used as loop counter. Instead it's +# used to optimize critical path in 'Z.hi ^= rem_4bit[Z.lo&0xf]'. +# The path involves move of Z.lo from MMX to integer register, +# effective address calculation and finally merge of value to Z.hi. +# Reference to rem_4bit is scheduled so late that I had to >>4 +# rem_4bit elements. This resulted in 20-45% procent improvement +# on contemporary µ-archs. +{ + my $cnt; + my $rem_4bit = "eax"; + my @rem = ($Zhh,$Zll); + my $nhi = $Zhl; + my $nlo = $Zlh; + + my ($Zlo,$Zhi) = ("mm0","mm1"); + my $tmp = "mm2"; + + &xor ($nlo,$nlo); # avoid partial register stalls on PIII + &mov ($nhi,$Zll); + &mov (&LB($nlo),&LB($nhi)); + &shl (&LB($nlo),4); + &and ($nhi,0xf0); + &movq ($Zlo,&QWP(8,$Htbl,$nlo)); + &movq ($Zhi,&QWP(0,$Htbl,$nlo)); + &movd ($rem[0],$Zlo); + + for ($cnt=28;$cnt>=-2;$cnt--) { + my $odd = $cnt&1; + my $nix = $odd ? $nlo : $nhi; + + &shl (&LB($nlo),4) if ($odd); + &psrlq ($Zlo,4); + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &pxor ($Zlo,&QWP(8,$Htbl,$nix)); + &mov (&LB($nlo),&BP($cnt/2,$inp)) if (!$odd && $cnt>=0); + &psllq ($tmp,60); + &and ($nhi,0xf0) if ($odd); + &pxor ($Zhi,&QWP(0,$rem_4bit,$rem[1],8)) if ($cnt<28); + &and ($rem[0],0xf); + &pxor ($Zhi,&QWP(0,$Htbl,$nix)); + &mov ($nhi,$nlo) if (!$odd && $cnt>=0); + &movd ($rem[1],$Zlo); + &pxor ($Zlo,$tmp); + + push (@rem,shift(@rem)); # "rotate" registers + } + + &mov ($inp,&DWP(4,$rem_4bit,$rem[1],8)); # last rem_4bit[rem] + + &psrlq ($Zlo,32); # lower part of Zlo is already there + &movd ($Zhl,$Zhi); + &psrlq ($Zhi,32); + &movd ($Zlh,$Zlo); + &movd ($Zhh,$Zhi); + &shl ($inp,4); # compensate for rem_4bit[i] being >>4 + + &bswap ($Zll); + &bswap ($Zhl); + &bswap ($Zlh); + &xor ($Zhh,$inp); + &bswap ($Zhh); + + &ret (); +} +&function_end_B("_mmx_gmult_4bit_inner"); + +&function_begin("gcm_gmult_4bit_mmx"); + &mov ($inp,&wparam(0)); # load Xi + &mov ($Htbl,&wparam(1)); # load Htable + + &call (&label("pic_point")); + &set_label("pic_point"); + &blindpop("eax"); + &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + + &movz ($Zll,&BP(15,$inp)); + + &call ("_mmx_gmult_4bit_inner"); + + &mov ($inp,&wparam(0)); # load Xi + &emms (); + &mov (&DWP(12,$inp),$Zll); + &mov (&DWP(4,$inp),$Zhl); + &mov (&DWP(8,$inp),$Zlh); + &mov (&DWP(0,$inp),$Zhh); +&function_end("gcm_gmult_4bit_mmx"); + +# Streamed version performs 20% better on P4, 7% on Opteron, +# 10% on Core2 and PIII... +&function_begin("gcm_ghash_4bit_mmx"); + &mov ($Zhh,&wparam(0)); # load Xi + &mov ($Htbl,&wparam(1)); # load Htable + &mov ($inp,&wparam(2)); # load in + &mov ($Zlh,&wparam(3)); # load len + + &call (&label("pic_point")); + &set_label("pic_point"); + &blindpop("eax"); + &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + + &add ($Zlh,$inp); + &mov (&wparam(3),$Zlh); # len to point at the end of input + &stack_push(4+1); # +1 for stack alignment + + &mov ($Zll,&DWP(12,$Zhh)); # load Xi[16] + &mov ($Zhl,&DWP(4,$Zhh)); + &mov ($Zlh,&DWP(8,$Zhh)); + &mov ($Zhh,&DWP(0,$Zhh)); + &jmp (&label("mmx_outer_loop")); + + &set_label("mmx_outer_loop",16); + &xor ($Zll,&DWP(12,$inp)); + &xor ($Zhl,&DWP(4,$inp)); + &xor ($Zlh,&DWP(8,$inp)); + &xor ($Zhh,&DWP(0,$inp)); + &mov (&wparam(2),$inp); + &mov (&DWP(12,"esp"),$Zll); + &mov (&DWP(4,"esp"),$Zhl); + &mov (&DWP(8,"esp"),$Zlh); + &mov (&DWP(0,"esp"),$Zhh); + + &mov ($inp,"esp"); + &shr ($Zll,24); + + &call ("_mmx_gmult_4bit_inner"); + + &mov ($inp,&wparam(2)); + &lea ($inp,&DWP(16,$inp)); + &cmp ($inp,&wparam(3)); + &jb (&label("mmx_outer_loop")); + + &mov ($inp,&wparam(0)); # load Xi + &emms (); + &mov (&DWP(12,$inp),$Zll); + &mov (&DWP(4,$inp),$Zhl); + &mov (&DWP(8,$inp),$Zlh); + &mov (&DWP(0,$inp),$Zhh); + + &stack_pop(4+1); +&function_end("gcm_ghash_4bit_mmx"); + +}} else {{ # "June" MMX version... + # ... has slower "April" gcm_gmult_4bit_mmx with folded + # loop. This is done to conserve code size... +$S=16; # shift factor for rem_4bit + +sub mmx_loop() { +# MMX version performs 2.8 times better on P4 (see comment in non-MMX +# routine for further details), 40% better on Opteron and Core2, 50% +# better on PIII... In other words effort is considered to be well +# spent... + my $inp = shift; + my $rem_4bit = shift; + my $cnt = $Zhh; + my $nhi = $Zhl; + my $nlo = $Zlh; + my $rem = $Zll; + + my ($Zlo,$Zhi) = ("mm0","mm1"); + my $tmp = "mm2"; + + &xor ($nlo,$nlo); # avoid partial register stalls on PIII + &mov ($nhi,$Zll); + &mov (&LB($nlo),&LB($nhi)); + &mov ($cnt,14); + &shl (&LB($nlo),4); + &and ($nhi,0xf0); + &movq ($Zlo,&QWP(8,$Htbl,$nlo)); + &movq ($Zhi,&QWP(0,$Htbl,$nlo)); + &movd ($rem,$Zlo); + &jmp (&label("mmx_loop")); + + &set_label("mmx_loop",16); + &psrlq ($Zlo,4); + &and ($rem,0xf); + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &pxor ($Zlo,&QWP(8,$Htbl,$nhi)); + &mov (&LB($nlo),&BP(0,$inp,$cnt)); + &psllq ($tmp,60); + &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); + &dec ($cnt); + &movd ($rem,$Zlo); + &pxor ($Zhi,&QWP(0,$Htbl,$nhi)); + &mov ($nhi,$nlo); + &pxor ($Zlo,$tmp); + &js (&label("mmx_break")); + + &shl (&LB($nlo),4); + &and ($rem,0xf); + &psrlq ($Zlo,4); + &and ($nhi,0xf0); + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &pxor ($Zlo,&QWP(8,$Htbl,$nlo)); + &psllq ($tmp,60); + &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); + &movd ($rem,$Zlo); + &pxor ($Zhi,&QWP(0,$Htbl,$nlo)); + &pxor ($Zlo,$tmp); + &jmp (&label("mmx_loop")); + + &set_label("mmx_break",16); + &shl (&LB($nlo),4); + &and ($rem,0xf); + &psrlq ($Zlo,4); + &and ($nhi,0xf0); + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &pxor ($Zlo,&QWP(8,$Htbl,$nlo)); + &psllq ($tmp,60); + &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); + &movd ($rem,$Zlo); + &pxor ($Zhi,&QWP(0,$Htbl,$nlo)); + &pxor ($Zlo,$tmp); + + &psrlq ($Zlo,4); + &and ($rem,0xf); + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &pxor ($Zlo,&QWP(8,$Htbl,$nhi)); + &psllq ($tmp,60); + &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); + &movd ($rem,$Zlo); + &pxor ($Zhi,&QWP(0,$Htbl,$nhi)); + &pxor ($Zlo,$tmp); + + &psrlq ($Zlo,32); # lower part of Zlo is already there + &movd ($Zhl,$Zhi); + &psrlq ($Zhi,32); + &movd ($Zlh,$Zlo); + &movd ($Zhh,$Zhi); + + &bswap ($Zll); + &bswap ($Zhl); + &bswap ($Zlh); + &bswap ($Zhh); +} + +&function_begin("gcm_gmult_4bit_mmx"); + &mov ($inp,&wparam(0)); # load Xi + &mov ($Htbl,&wparam(1)); # load Htable + + &call (&label("pic_point")); + &set_label("pic_point"); + &blindpop("eax"); + &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + + &movz ($Zll,&BP(15,$inp)); + + &mmx_loop($inp,"eax"); + + &emms (); + &mov (&DWP(12,$inp),$Zll); + &mov (&DWP(4,$inp),$Zhl); + &mov (&DWP(8,$inp),$Zlh); + &mov (&DWP(0,$inp),$Zhh); +&function_end("gcm_gmult_4bit_mmx"); + +###################################################################### +# Below subroutine is "528B" variant of "4-bit" GCM GHASH function +# (see gcm128.c for details). It provides further 20-40% performance +# improvement over above mentioned "May" version. + +&static_label("rem_8bit"); + +&function_begin("gcm_ghash_4bit_mmx"); +{ my ($Zlo,$Zhi) = ("mm7","mm6"); + my $rem_8bit = "esi"; + my $Htbl = "ebx"; + + # parameter block + &mov ("eax",&wparam(0)); # Xi + &mov ("ebx",&wparam(1)); # Htable + &mov ("ecx",&wparam(2)); # inp + &mov ("edx",&wparam(3)); # len + &mov ("ebp","esp"); # original %esp + &call (&label("pic_point")); + &set_label ("pic_point"); + &blindpop ($rem_8bit); + &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit)); + + &sub ("esp",512+16+16); # allocate stack frame... + &and ("esp",-64); # ...and align it + &sub ("esp",16); # place for (u8)(H[]<<4) + + &add ("edx","ecx"); # pointer to the end of input + &mov (&DWP(528+16+0,"esp"),"eax"); # save Xi + &mov (&DWP(528+16+8,"esp"),"edx"); # save inp+len + &mov (&DWP(528+16+12,"esp"),"ebp"); # save original %esp + + { my @lo = ("mm0","mm1","mm2"); + my @hi = ("mm3","mm4","mm5"); + my @tmp = ("mm6","mm7"); + my ($off1,$off2,$i) = (0,0,); + + &add ($Htbl,128); # optimize for size + &lea ("edi",&DWP(16+128,"esp")); + &lea ("ebp",&DWP(16+256+128,"esp")); + + # decompose Htable (low and high parts are kept separately), + # generate Htable[]>>4, (u8)(Htable[]<<4), save to stack... + for ($i=0;$i<18;$i++) { + + &mov ("edx",&DWP(16*$i+8-128,$Htbl)) if ($i<16); + &movq ($lo[0],&QWP(16*$i+8-128,$Htbl)) if ($i<16); + &psllq ($tmp[1],60) if ($i>1); + &movq ($hi[0],&QWP(16*$i+0-128,$Htbl)) if ($i<16); + &por ($lo[2],$tmp[1]) if ($i>1); + &movq (&QWP($off1-128,"edi"),$lo[1]) if ($i>0 && $i<17); + &psrlq ($lo[1],4) if ($i>0 && $i<17); + &movq (&QWP($off1,"edi"),$hi[1]) if ($i>0 && $i<17); + &movq ($tmp[0],$hi[1]) if ($i>0 && $i<17); + &movq (&QWP($off2-128,"ebp"),$lo[2]) if ($i>1); + &psrlq ($hi[1],4) if ($i>0 && $i<17); + &movq (&QWP($off2,"ebp"),$hi[2]) if ($i>1); + &shl ("edx",4) if ($i<16); + &mov (&BP($i,"esp"),&LB("edx")) if ($i<16); + + unshift (@lo,pop(@lo)); # "rotate" registers + unshift (@hi,pop(@hi)); + unshift (@tmp,pop(@tmp)); + $off1 += 8 if ($i>0); + $off2 += 8 if ($i>1); + } + } + + &movq ($Zhi,&QWP(0,"eax")); + &mov ("ebx",&DWP(8,"eax")); + &mov ("edx",&DWP(12,"eax")); # load Xi + +&set_label("outer",16); + { my $nlo = "eax"; + my $dat = "edx"; + my @nhi = ("edi","ebp"); + my @rem = ("ebx","ecx"); + my @red = ("mm0","mm1","mm2"); + my $tmp = "mm3"; + + &xor ($dat,&DWP(12,"ecx")); # merge input data + &xor ("ebx",&DWP(8,"ecx")); + &pxor ($Zhi,&QWP(0,"ecx")); + &lea ("ecx",&DWP(16,"ecx")); # inp+=16 + #&mov (&DWP(528+12,"esp"),$dat); # save inp^Xi + &mov (&DWP(528+8,"esp"),"ebx"); + &movq (&QWP(528+0,"esp"),$Zhi); + &mov (&DWP(528+16+4,"esp"),"ecx"); # save inp + + &xor ($nlo,$nlo); + &rol ($dat,8); + &mov (&LB($nlo),&LB($dat)); + &mov ($nhi[1],$nlo); + &and (&LB($nlo),0x0f); + &shr ($nhi[1],4); + &pxor ($red[0],$red[0]); + &rol ($dat,8); # next byte + &pxor ($red[1],$red[1]); + &pxor ($red[2],$red[2]); + + # Just like in "May" verson modulo-schedule for critical path in + # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' + # is scheduled so late that rem_8bit[] has to be shifted *right* + # by 16, which is why last argument to pinsrw is 2, which + # corresponds to <<32=<<48>>16... + for ($j=11,$i=0;$i<15;$i++) { + + if ($i>0) { + &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo] + &rol ($dat,8); # next byte + &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8)); + + &pxor ($Zlo,$tmp); + &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8)); + &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4) + } else { + &movq ($Zlo,&QWP(16,"esp",$nlo,8)); + &movq ($Zhi,&QWP(16+128,"esp",$nlo,8)); + } + + &mov (&LB($nlo),&LB($dat)); + &mov ($dat,&DWP(528+$j,"esp")) if (--$j%4==0); + + &movd ($rem[0],$Zlo); + &movz ($rem[1],&LB($rem[1])) if ($i>0); + &psrlq ($Zlo,8); # Z>>=8 + + &movq ($tmp,$Zhi); + &mov ($nhi[0],$nlo); + &psrlq ($Zhi,8); + + &pxor ($Zlo,&QWP(16+256+0,"esp",$nhi[1],8)); # Z^=H[nhi]>>4 + &and (&LB($nlo),0x0f); + &psllq ($tmp,56); + + &pxor ($Zhi,$red[1]) if ($i>1); + &shr ($nhi[0],4); + &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2) if ($i>0); + + unshift (@red,pop(@red)); # "rotate" registers + unshift (@rem,pop(@rem)); + unshift (@nhi,pop(@nhi)); + } + + &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo] + &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8)); + &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4) + + &pxor ($Zlo,$tmp); + &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8)); + &movz ($rem[1],&LB($rem[1])); + + &pxor ($red[2],$red[2]); # clear 2nd word + &psllq ($red[1],4); + + &movd ($rem[0],$Zlo); + &psrlq ($Zlo,4); # Z>>=4 + + &movq ($tmp,$Zhi); + &psrlq ($Zhi,4); + &shl ($rem[0],4); # rem<<4 + + &pxor ($Zlo,&QWP(16,"esp",$nhi[1],8)); # Z^=H[nhi] + &psllq ($tmp,60); + &movz ($rem[0],&LB($rem[0])); + + &pxor ($Zlo,$tmp); + &pxor ($Zhi,&QWP(16+128,"esp",$nhi[1],8)); + + &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2); + &pxor ($Zhi,$red[1]); + + &movd ($dat,$Zlo); + &pinsrw ($red[2],&WP(0,$rem_8bit,$rem[0],2),3); # last is <<48 + + &psllq ($red[0],12); # correct by <<16>>4 + &pxor ($Zhi,$red[0]); + &psrlq ($Zlo,32); + &pxor ($Zhi,$red[2]); + + &mov ("ecx",&DWP(528+16+4,"esp")); # restore inp + &movd ("ebx",$Zlo); + &movq ($tmp,$Zhi); # 01234567 + &psllw ($Zhi,8); # 1.3.5.7. + &psrlw ($tmp,8); # .0.2.4.6 + &por ($Zhi,$tmp); # 10325476 + &bswap ($dat); + &pshufw ($Zhi,$Zhi,0b00011011); # 76543210 + &bswap ("ebx"); + + &cmp ("ecx",&DWP(528+16+8,"esp")); # are we done? + &jne (&label("outer")); + } + + &mov ("eax",&DWP(528+16+0,"esp")); # restore Xi + &mov (&DWP(12,"eax"),"edx"); + &mov (&DWP(8,"eax"),"ebx"); + &movq (&QWP(0,"eax"),$Zhi); + + &mov ("esp",&DWP(528+16+12,"esp")); # restore original %esp + &emms (); +} +&function_end("gcm_ghash_4bit_mmx"); +}} + +if ($sse2) {{ +###################################################################### +# PCLMULQDQ version. + +$Xip="eax"; +$Htbl="edx"; +$const="ecx"; +$inp="esi"; +$len="ebx"; + +($Xi,$Xhi)=("xmm0","xmm1"); $Hkey="xmm2"; +($T1,$T2,$T3)=("xmm3","xmm4","xmm5"); +($Xn,$Xhn)=("xmm6","xmm7"); + +&static_label("bswap"); + +sub clmul64x64_T2 { # minimal "register" pressure +my ($Xhi,$Xi,$Hkey)=@_; + + &movdqa ($Xhi,$Xi); # + &pshufd ($T1,$Xi,0b01001110); + &pshufd ($T2,$Hkey,0b01001110); + &pxor ($T1,$Xi); # + &pxor ($T2,$Hkey); + + &pclmulqdq ($Xi,$Hkey,0x00); ####### + &pclmulqdq ($Xhi,$Hkey,0x11); ####### + &pclmulqdq ($T1,$T2,0x00); ####### + &xorps ($T1,$Xi); # + &xorps ($T1,$Xhi); # + + &movdqa ($T2,$T1); # + &psrldq ($T1,8); + &pslldq ($T2,8); # + &pxor ($Xhi,$T1); + &pxor ($Xi,$T2); # +} + +sub clmul64x64_T3 { +# Even though this subroutine offers visually better ILP, it +# was empirically found to be a tad slower than above version. +# At least in gcm_ghash_clmul context. But it's just as well, +# because loop modulo-scheduling is possible only thanks to +# minimized "register" pressure... +my ($Xhi,$Xi,$Hkey)=@_; + + &movdqa ($T1,$Xi); # + &movdqa ($Xhi,$Xi); + &pclmulqdq ($Xi,$Hkey,0x00); ####### + &pclmulqdq ($Xhi,$Hkey,0x11); ####### + &pshufd ($T2,$T1,0b01001110); # + &pshufd ($T3,$Hkey,0b01001110); + &pxor ($T2,$T1); # + &pxor ($T3,$Hkey); + &pclmulqdq ($T2,$T3,0x00); ####### + &pxor ($T2,$Xi); # + &pxor ($T2,$Xhi); # + + &movdqa ($T3,$T2); # + &psrldq ($T2,8); + &pslldq ($T3,8); # + &pxor ($Xhi,$T2); + &pxor ($Xi,$T3); # +} + +if (1) { # Algorithm 9 with <<1 twist. + # Reduction is shorter and uses only two + # temporary registers, which makes it better + # candidate for interleaving with 64x64 + # multiplication. Pre-modulo-scheduled loop + # was found to be ~20% faster than Algorithm 5 + # below. Algorithm 9 was therefore chosen for + # further optimization... + +sub reduction_alg9 { # 17/13 times faster than Intel version +my ($Xhi,$Xi) = @_; + + # 1st phase + &movdqa ($T1,$Xi); # + &psllq ($Xi,1); + &pxor ($Xi,$T1); # + &psllq ($Xi,5); # + &pxor ($Xi,$T1); # + &psllq ($Xi,57); # + &movdqa ($T2,$Xi); # + &pslldq ($Xi,8); + &psrldq ($T2,8); # + &pxor ($Xi,$T1); + &pxor ($Xhi,$T2); # + + # 2nd phase + &movdqa ($T2,$Xi); + &psrlq ($Xi,5); + &pxor ($Xi,$T2); # + &psrlq ($Xi,1); # + &pxor ($Xi,$T2); # + &pxor ($T2,$Xhi); + &psrlq ($Xi,1); # + &pxor ($Xi,$T2); # +} + +&function_begin_B("gcm_init_clmul"); + &mov ($Htbl,&wparam(0)); + &mov ($Xip,&wparam(1)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Hkey,&QWP(0,$Xip)); + &pshufd ($Hkey,$Hkey,0b01001110);# dword swap + + # <<1 twist + &pshufd ($T2,$Hkey,0b11111111); # broadcast uppermost dword + &movdqa ($T1,$Hkey); + &psllq ($Hkey,1); + &pxor ($T3,$T3); # + &psrlq ($T1,63); + &pcmpgtd ($T3,$T2); # broadcast carry bit + &pslldq ($T1,8); + &por ($Hkey,$T1); # H<<=1 + + # magic reduction + &pand ($T3,&QWP(16,$const)); # 0x1c2_polynomial + &pxor ($Hkey,$T3); # if(carry) H^=0x1c2_polynomial + + # calculate H^2 + &movdqa ($Xi,$Hkey); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &reduction_alg9 ($Xhi,$Xi); + + &movdqu (&QWP(0,$Htbl),$Hkey); # save H + &movdqu (&QWP(16,$Htbl),$Xi); # save H^2 + + &ret (); +&function_end_B("gcm_init_clmul"); + +&function_begin_B("gcm_gmult_clmul"); + &mov ($Xip,&wparam(0)); + &mov ($Htbl,&wparam(1)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Xi,&QWP(0,$Xip)); + &movdqa ($T3,&QWP(0,$const)); + &movups ($Hkey,&QWP(0,$Htbl)); + &pshufb ($Xi,$T3); + + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &reduction_alg9 ($Xhi,$Xi); + + &pshufb ($Xi,$T3); + &movdqu (&QWP(0,$Xip),$Xi); + + &ret (); +&function_end_B("gcm_gmult_clmul"); + +&function_begin("gcm_ghash_clmul"); + &mov ($Xip,&wparam(0)); + &mov ($Htbl,&wparam(1)); + &mov ($inp,&wparam(2)); + &mov ($len,&wparam(3)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Xi,&QWP(0,$Xip)); + &movdqa ($T3,&QWP(0,$const)); + &movdqu ($Hkey,&QWP(0,$Htbl)); + &pshufb ($Xi,$T3); + + &sub ($len,0x10); + &jz (&label("odd_tail")); + + ####### + # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = + # [(H*Ii+1) + (H*Xi+1)] mod P = + # [(H*Ii+1) + H^2*(Ii+Xi)] mod P + # + &movdqu ($T1,&QWP(0,$inp)); # Ii + &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 + &pshufb ($T1,$T3); + &pshufb ($Xn,$T3); + &pxor ($Xi,$T1); # Ii+Xi + + &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 + &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 + + &lea ($inp,&DWP(32,$inp)); # i+=2 + &sub ($len,0x20); + &jbe (&label("even_tail")); + +&set_label("mod_loop"); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) + &movdqu ($T1,&QWP(0,$inp)); # Ii + &movups ($Hkey,&QWP(0,$Htbl)); # load H + + &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &pxor ($Xhi,$Xhn); + + &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 + &pshufb ($T1,$T3); + &pshufb ($Xn,$T3); + + &movdqa ($T3,$Xn); #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1 + &movdqa ($Xhn,$Xn); + &pxor ($Xhi,$T1); # "Ii+Xi", consume early + + &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase + &psllq ($Xi,1); + &pxor ($Xi,$T1); # + &psllq ($Xi,5); # + &pxor ($Xi,$T1); # + &pclmulqdq ($Xn,$Hkey,0x00); ####### + &psllq ($Xi,57); # + &movdqa ($T2,$Xi); # + &pslldq ($Xi,8); + &psrldq ($T2,8); # + &pxor ($Xi,$T1); + &pshufd ($T1,$T3,0b01001110); + &pxor ($Xhi,$T2); # + &pxor ($T1,$T3); + &pshufd ($T3,$Hkey,0b01001110); + &pxor ($T3,$Hkey); # + + &pclmulqdq ($Xhn,$Hkey,0x11); ####### + &movdqa ($T2,$Xi); # 2nd phase + &psrlq ($Xi,5); + &pxor ($Xi,$T2); # + &psrlq ($Xi,1); # + &pxor ($Xi,$T2); # + &pxor ($T2,$Xhi); + &psrlq ($Xi,1); # + &pxor ($Xi,$T2); # + + &pclmulqdq ($T1,$T3,0x00); ####### + &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 + &xorps ($T1,$Xn); # + &xorps ($T1,$Xhn); # + + &movdqa ($T3,$T1); # + &psrldq ($T1,8); + &pslldq ($T3,8); # + &pxor ($Xhn,$T1); + &pxor ($Xn,$T3); # + &movdqa ($T3,&QWP(0,$const)); + + &lea ($inp,&DWP(32,$inp)); + &sub ($len,0x20); + &ja (&label("mod_loop")); + +&set_label("even_tail"); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) + + &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &pxor ($Xhi,$Xhn); + + &reduction_alg9 ($Xhi,$Xi); + + &test ($len,$len); + &jnz (&label("done")); + + &movups ($Hkey,&QWP(0,$Htbl)); # load H +&set_label("odd_tail"); + &movdqu ($T1,&QWP(0,$inp)); # Ii + &pshufb ($T1,$T3); + &pxor ($Xi,$T1); # Ii+Xi + + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) + &reduction_alg9 ($Xhi,$Xi); + +&set_label("done"); + &pshufb ($Xi,$T3); + &movdqu (&QWP(0,$Xip),$Xi); +&function_end("gcm_ghash_clmul"); + +} else { # Algorith 5. Kept for reference purposes. + +sub reduction_alg5 { # 19/16 times faster than Intel version +my ($Xhi,$Xi)=@_; + + # <<1 + &movdqa ($T1,$Xi); # + &movdqa ($T2,$Xhi); + &pslld ($Xi,1); + &pslld ($Xhi,1); # + &psrld ($T1,31); + &psrld ($T2,31); # + &movdqa ($T3,$T1); + &pslldq ($T1,4); + &psrldq ($T3,12); # + &pslldq ($T2,4); + &por ($Xhi,$T3); # + &por ($Xi,$T1); + &por ($Xhi,$T2); # + + # 1st phase + &movdqa ($T1,$Xi); + &movdqa ($T2,$Xi); + &movdqa ($T3,$Xi); # + &pslld ($T1,31); + &pslld ($T2,30); + &pslld ($Xi,25); # + &pxor ($T1,$T2); + &pxor ($T1,$Xi); # + &movdqa ($T2,$T1); # + &pslldq ($T1,12); + &psrldq ($T2,4); # + &pxor ($T3,$T1); + + # 2nd phase + &pxor ($Xhi,$T3); # + &movdqa ($Xi,$T3); + &movdqa ($T1,$T3); + &psrld ($Xi,1); # + &psrld ($T1,2); + &psrld ($T3,7); # + &pxor ($Xi,$T1); + &pxor ($Xhi,$T2); + &pxor ($Xi,$T3); # + &pxor ($Xi,$Xhi); # +} + +&function_begin_B("gcm_init_clmul"); + &mov ($Htbl,&wparam(0)); + &mov ($Xip,&wparam(1)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Hkey,&QWP(0,$Xip)); + &pshufd ($Hkey,$Hkey,0b01001110);# dword swap + + # calculate H^2 + &movdqa ($Xi,$Hkey); + &clmul64x64_T3 ($Xhi,$Xi,$Hkey); + &reduction_alg5 ($Xhi,$Xi); + + &movdqu (&QWP(0,$Htbl),$Hkey); # save H + &movdqu (&QWP(16,$Htbl),$Xi); # save H^2 + + &ret (); +&function_end_B("gcm_init_clmul"); + +&function_begin_B("gcm_gmult_clmul"); + &mov ($Xip,&wparam(0)); + &mov ($Htbl,&wparam(1)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Xi,&QWP(0,$Xip)); + &movdqa ($Xn,&QWP(0,$const)); + &movdqu ($Hkey,&QWP(0,$Htbl)); + &pshufb ($Xi,$Xn); + + &clmul64x64_T3 ($Xhi,$Xi,$Hkey); + &reduction_alg5 ($Xhi,$Xi); + + &pshufb ($Xi,$Xn); + &movdqu (&QWP(0,$Xip),$Xi); + + &ret (); +&function_end_B("gcm_gmult_clmul"); + +&function_begin("gcm_ghash_clmul"); + &mov ($Xip,&wparam(0)); + &mov ($Htbl,&wparam(1)); + &mov ($inp,&wparam(2)); + &mov ($len,&wparam(3)); + + &call (&label("pic")); +&set_label("pic"); + &blindpop ($const); + &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + + &movdqu ($Xi,&QWP(0,$Xip)); + &movdqa ($T3,&QWP(0,$const)); + &movdqu ($Hkey,&QWP(0,$Htbl)); + &pshufb ($Xi,$T3); + + &sub ($len,0x10); + &jz (&label("odd_tail")); + + ####### + # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = + # [(H*Ii+1) + (H*Xi+1)] mod P = + # [(H*Ii+1) + H^2*(Ii+Xi)] mod P + # + &movdqu ($T1,&QWP(0,$inp)); # Ii + &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 + &pshufb ($T1,$T3); + &pshufb ($Xn,$T3); + &pxor ($Xi,$T1); # Ii+Xi + + &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1 + &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2 + + &sub ($len,0x20); + &lea ($inp,&DWP(32,$inp)); # i+=2 + &jbe (&label("even_tail")); + +&set_label("mod_loop"); + &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) + &movdqu ($Hkey,&QWP(0,$Htbl)); # load H + + &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &pxor ($Xhi,$Xhn); + + &reduction_alg5 ($Xhi,$Xi); + + ####### + &movdqa ($T3,&QWP(0,$const)); + &movdqu ($T1,&QWP(0,$inp)); # Ii + &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 + &pshufb ($T1,$T3); + &pshufb ($Xn,$T3); + &pxor ($Xi,$T1); # Ii+Xi + + &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1 + &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2 + + &sub ($len,0x20); + &lea ($inp,&DWP(32,$inp)); + &ja (&label("mod_loop")); + +&set_label("even_tail"); + &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) + + &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &pxor ($Xhi,$Xhn); + + &reduction_alg5 ($Xhi,$Xi); + + &movdqa ($T3,&QWP(0,$const)); + &test ($len,$len); + &jnz (&label("done")); + + &movdqu ($Hkey,&QWP(0,$Htbl)); # load H +&set_label("odd_tail"); + &movdqu ($T1,&QWP(0,$inp)); # Ii + &pshufb ($T1,$T3); + &pxor ($Xi,$T1); # Ii+Xi + + &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) + &reduction_alg5 ($Xhi,$Xi); + + &movdqa ($T3,&QWP(0,$const)); +&set_label("done"); + &pshufb ($Xi,$T3); + &movdqu (&QWP(0,$Xip),$Xi); +&function_end("gcm_ghash_clmul"); + +} + +&set_label("bswap",64); + &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); + &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial +}} # $sse2 + +&set_label("rem_4bit",64); + &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); + &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); + &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S); + &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S); +&set_label("rem_8bit",64); + &data_short(0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E); + &data_short(0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E); + &data_short(0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E); + &data_short(0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E); + &data_short(0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E); + &data_short(0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E); + &data_short(0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E); + &data_short(0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E); + &data_short(0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE); + &data_short(0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE); + &data_short(0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE); + &data_short(0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE); + &data_short(0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E); + &data_short(0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E); + &data_short(0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE); + &data_short(0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE); + &data_short(0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E); + &data_short(0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E); + &data_short(0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E); + &data_short(0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E); + &data_short(0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E); + &data_short(0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E); + &data_short(0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E); + &data_short(0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E); + &data_short(0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE); + &data_short(0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE); + &data_short(0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE); + &data_short(0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE); + &data_short(0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E); + &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); + &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); + &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); +}}} # !$x86only + +&asciz("GHASH for x86, CRYPTOGAMS by "); +&asm_finish(); + +# A question was risen about choice of vanilla MMX. Or rather why wasn't +# SSE2 chosen instead? In addition to the fact that MMX runs on legacy +# CPUs such as PIII, "4-bit" MMX version was observed to provide better +# performance than *corresponding* SSE2 one even on contemporary CPUs. +# SSE2 results were provided by Peter-Michael Hager. He maintains SSE2 +# implementation featuring full range of lookup-table sizes, but with +# per-invocation lookup table setup. Latter means that table size is +# chosen depending on how much data is to be hashed in every given call, +# more data - larger table. Best reported result for Core2 is ~4 cycles +# per processed byte out of 64KB block. This number accounts even for +# 64KB table setup overhead. As discussed in gcm128.c we choose to be +# more conservative in respect to lookup table sizes, but how do the +# results compare? Minimalistic "256B" MMX version delivers ~11 cycles +# on same platform. As also discussed in gcm128.c, next in line "8-bit +# Shoup's" or "4KB" method should deliver twice the performance of +# "256B" one, in other words not worse than ~6 cycles per byte. It +# should be also be noted that in SSE2 case improvement can be "super- +# linear," i.e. more than twice, mostly because >>8 maps to single +# instruction on SSE2 register. This is unlike "4-bit" case when >>4 +# maps to same amount of instructions in both MMX and SSE2 cases. +# Bottom line is that switch to SSE2 is considered to be justifiable +# only in case we choose to implement "8-bit" method... diff --git a/app/openssl/crypto/modes/asm/ghash-x86_64.S b/app/openssl/crypto/modes/asm/ghash-x86_64.S new file mode 100644 index 00000000..62d39c65 --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-x86_64.S @@ -0,0 +1,1026 @@ +.text + +.globl gcm_gmult_4bit +.type gcm_gmult_4bit,@function +.align 16 +gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp .Loop1 + +.align 16 +.Loop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.align 16 +.Lbreak1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +.Lgmult_epilogue: + .byte 0xf3,0xc3 +.size gcm_gmult_4bit,.-gcm_gmult_4bit +.globl gcm_ghash_4bit +.type gcm_ghash_4bit,@function +.align 16 +gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.align 16 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lghash_epilogue: + .byte 0xf3,0xc3 +.size gcm_ghash_4bit,.-gcm_ghash_4bit +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm2,(%rdi) + movdqu %xmm0,16(%rdi) + .byte 0xf3,0xc3 +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm5 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + + subq $16,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm8 + + + + + + movdqu (%rdx),%xmm3 + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + leaq 32(%rdx),%rdx + subq $32,%rcx + jbe .Leven_tail + +.Lmod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqu (%rdx),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 + pxor %xmm3,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + +.byte 102,15,58,68,250,17 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 + + leaq 32(%rdx),%rdx + subq $32,%rcx + ja .Lmod_loop + +.Leven_tail: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.Ldone: +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.LSEH_end_gcm_ghash_clmul: +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/app/openssl/crypto/modes/asm/ghash-x86_64.pl b/app/openssl/crypto/modes/asm/ghash-x86_64.pl new file mode 100644 index 00000000..38d779ed --- /dev/null +++ b/app/openssl/crypto/modes/asm/ghash-x86_64.pl @@ -0,0 +1,806 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# March, June 2010 +# +# The module implements "4-bit" GCM GHASH function and underlying +# single multiplication operation in GF(2^128). "4-bit" means that +# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH +# function features so called "528B" variant utilizing additional +# 256+16 bytes of per-key storage [+512 bytes shared table]. +# Performance results are for this streamed GHASH subroutine and are +# expressed in cycles per processed byte, less is better: +# +# gcc 3.4.x(*) assembler +# +# P4 28.6 14.0 +100% +# Opteron 19.3 7.7 +150% +# Core2 17.8 8.1(**) +120% +# +# (*) comparison is not completely fair, because C results are +# for vanilla "256B" implementation, while assembler results +# are for "528B";-) +# (**) it's mystery [to me] why Core2 result is not same as for +# Opteron; + +# May 2010 +# +# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. +# See ghash-x86.pl for background information and details about coding +# techniques. +# +# Special thanks to David Woodhouse for +# providing access to a Westmere-based system on behalf of Intel +# Open Source Technology Centre. + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# common register layout +$nlo="%rax"; +$nhi="%rbx"; +$Zlo="%r8"; +$Zhi="%r9"; +$tmp="%r10"; +$rem_4bit = "%r11"; + +$Xi="%rdi"; +$Htbl="%rsi"; + +# per-function register layout +$cnt="%rcx"; +$rem="%rdx"; + +sub LB() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/ or + $r =~ s/%[er]([sd]i)/%\1l/ or + $r =~ s/%[er](bp)/%\1l/ or + $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +{ my $N; + sub loop() { + my $inp = shift; + + $N++; +$code.=<<___; + xor $nlo,$nlo + xor $nhi,$nhi + mov `&LB("$Zlo")`,`&LB("$nlo")` + mov `&LB("$Zlo")`,`&LB("$nhi")` + shl \$4,`&LB("$nlo")` + mov \$14,$cnt + mov 8($Htbl,$nlo),$Zlo + mov ($Htbl,$nlo),$Zhi + and \$0xf0,`&LB("$nhi")` + mov $Zlo,$rem + jmp .Loop$N + +.align 16 +.Loop$N: + shr \$4,$Zlo + and \$0xf,$rem + mov $Zhi,$tmp + mov ($inp,$cnt),`&LB("$nlo")` + shr \$4,$Zhi + xor 8($Htbl,$nhi),$Zlo + shl \$60,$tmp + xor ($Htbl,$nhi),$Zhi + mov `&LB("$nlo")`,`&LB("$nhi")` + xor ($rem_4bit,$rem,8),$Zhi + mov $Zlo,$rem + shl \$4,`&LB("$nlo")` + xor $tmp,$Zlo + dec $cnt + js .Lbreak$N + + shr \$4,$Zlo + and \$0xf,$rem + mov $Zhi,$tmp + shr \$4,$Zhi + xor 8($Htbl,$nlo),$Zlo + shl \$60,$tmp + xor ($Htbl,$nlo),$Zhi + and \$0xf0,`&LB("$nhi")` + xor ($rem_4bit,$rem,8),$Zhi + mov $Zlo,$rem + xor $tmp,$Zlo + jmp .Loop$N + +.align 16 +.Lbreak$N: + shr \$4,$Zlo + and \$0xf,$rem + mov $Zhi,$tmp + shr \$4,$Zhi + xor 8($Htbl,$nlo),$Zlo + shl \$60,$tmp + xor ($Htbl,$nlo),$Zhi + and \$0xf0,`&LB("$nhi")` + xor ($rem_4bit,$rem,8),$Zhi + mov $Zlo,$rem + xor $tmp,$Zlo + + shr \$4,$Zlo + and \$0xf,$rem + mov $Zhi,$tmp + shr \$4,$Zhi + xor 8($Htbl,$nhi),$Zlo + shl \$60,$tmp + xor ($Htbl,$nhi),$Zhi + xor $tmp,$Zlo + xor ($rem_4bit,$rem,8),$Zhi + + bswap $Zlo + bswap $Zhi +___ +}} + +$code=<<___; +.text + +.globl gcm_gmult_4bit +.type gcm_gmult_4bit,\@function,2 +.align 16 +gcm_gmult_4bit: + push %rbx + push %rbp # %rbp and %r12 are pushed exclusively in + push %r12 # order to reuse Win64 exception handler... +.Lgmult_prologue: + + movzb 15($Xi),$Zlo + lea .Lrem_4bit(%rip),$rem_4bit +___ + &loop ($Xi); +$code.=<<___; + mov $Zlo,8($Xi) + mov $Zhi,($Xi) + + mov 16(%rsp),%rbx + lea 24(%rsp),%rsp +.Lgmult_epilogue: + ret +.size gcm_gmult_4bit,.-gcm_gmult_4bit +___ + +# per-function register layout +$inp="%rdx"; +$len="%rcx"; +$rem_8bit=$rem_4bit; + +$code.=<<___; +.globl gcm_ghash_4bit +.type gcm_ghash_4bit,\@function,4 +.align 16 +gcm_ghash_4bit: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + sub \$280,%rsp +.Lghash_prologue: + mov $inp,%r14 # reassign couple of args + mov $len,%r15 +___ +{ my $inp="%r14"; + my $dat="%edx"; + my $len="%r15"; + my @nhi=("%ebx","%ecx"); + my @rem=("%r12","%r13"); + my $Hshr4="%rbp"; + + &sub ($Htbl,-128); # size optimization + &lea ($Hshr4,"16+128(%rsp)"); + { my @lo =($nlo,$nhi); + my @hi =($Zlo,$Zhi); + + &xor ($dat,$dat); + for ($i=0,$j=-2;$i<18;$i++,$j++) { + &mov ("$j(%rsp)",&LB($dat)) if ($i>1); + &or ($lo[0],$tmp) if ($i>1); + &mov (&LB($dat),&LB($lo[1])) if ($i>0 && $i<17); + &shr ($lo[1],4) if ($i>0 && $i<17); + &mov ($tmp,$hi[1]) if ($i>0 && $i<17); + &shr ($hi[1],4) if ($i>0 && $i<17); + &mov ("8*$j($Hshr4)",$hi[0]) if ($i>1); + &mov ($hi[0],"16*$i+0-128($Htbl)") if ($i<16); + &shl (&LB($dat),4) if ($i>0 && $i<17); + &mov ("8*$j-128($Hshr4)",$lo[0]) if ($i>1); + &mov ($lo[0],"16*$i+8-128($Htbl)") if ($i<16); + &shl ($tmp,60) if ($i>0 && $i<17); + + push (@lo,shift(@lo)); + push (@hi,shift(@hi)); + } + } + &add ($Htbl,-128); + &mov ($Zlo,"8($Xi)"); + &mov ($Zhi,"0($Xi)"); + &add ($len,$inp); # pointer to the end of data + &lea ($rem_8bit,".Lrem_8bit(%rip)"); + &jmp (".Louter_loop"); + +$code.=".align 16\n.Louter_loop:\n"; + &xor ($Zhi,"($inp)"); + &mov ("%rdx","8($inp)"); + &lea ($inp,"16($inp)"); + &xor ("%rdx",$Zlo); + &mov ("($Xi)",$Zhi); + &mov ("8($Xi)","%rdx"); + &shr ("%rdx",32); + + &xor ($nlo,$nlo); + &rol ($dat,8); + &mov (&LB($nlo),&LB($dat)); + &movz ($nhi[0],&LB($dat)); + &shl (&LB($nlo),4); + &shr ($nhi[0],4); + + for ($j=11,$i=0;$i<15;$i++) { + &rol ($dat,8); + &xor ($Zlo,"8($Htbl,$nlo)") if ($i>0); + &xor ($Zhi,"($Htbl,$nlo)") if ($i>0); + &mov ($Zlo,"8($Htbl,$nlo)") if ($i==0); + &mov ($Zhi,"($Htbl,$nlo)") if ($i==0); + + &mov (&LB($nlo),&LB($dat)); + &xor ($Zlo,$tmp) if ($i>0); + &movzw ($rem[1],"($rem_8bit,$rem[1],2)") if ($i>0); + + &movz ($nhi[1],&LB($dat)); + &shl (&LB($nlo),4); + &movzb ($rem[0],"(%rsp,$nhi[0])"); + + &shr ($nhi[1],4) if ($i<14); + &and ($nhi[1],0xf0) if ($i==14); + &shl ($rem[1],48) if ($i>0); + &xor ($rem[0],$Zlo); + + &mov ($tmp,$Zhi); + &xor ($Zhi,$rem[1]) if ($i>0); + &shr ($Zlo,8); + + &movz ($rem[0],&LB($rem[0])); + &mov ($dat,"$j($Xi)") if (--$j%4==0); + &shr ($Zhi,8); + + &xor ($Zlo,"-128($Hshr4,$nhi[0],8)"); + &shl ($tmp,56); + &xor ($Zhi,"($Hshr4,$nhi[0],8)"); + + unshift (@nhi,pop(@nhi)); # "rotate" registers + unshift (@rem,pop(@rem)); + } + &movzw ($rem[1],"($rem_8bit,$rem[1],2)"); + &xor ($Zlo,"8($Htbl,$nlo)"); + &xor ($Zhi,"($Htbl,$nlo)"); + + &shl ($rem[1],48); + &xor ($Zlo,$tmp); + + &xor ($Zhi,$rem[1]); + &movz ($rem[0],&LB($Zlo)); + &shr ($Zlo,4); + + &mov ($tmp,$Zhi); + &shl (&LB($rem[0]),4); + &shr ($Zhi,4); + + &xor ($Zlo,"8($Htbl,$nhi[0])"); + &movzw ($rem[0],"($rem_8bit,$rem[0],2)"); + &shl ($tmp,60); + + &xor ($Zhi,"($Htbl,$nhi[0])"); + &xor ($Zlo,$tmp); + &shl ($rem[0],48); + + &bswap ($Zlo); + &xor ($Zhi,$rem[0]); + + &bswap ($Zhi); + &cmp ($inp,$len); + &jb (".Louter_loop"); +} +$code.=<<___; + mov $Zlo,8($Xi) + mov $Zhi,($Xi) + + lea 280(%rsp),%rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lghash_epilogue: + ret +.size gcm_ghash_4bit,.-gcm_ghash_4bit +___ + +###################################################################### +# PCLMULQDQ version. + +@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order + +($Xi,$Xhi)=("%xmm0","%xmm1"); $Hkey="%xmm2"; +($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5"); + +sub clmul64x64_T2 { # minimal register pressure +my ($Xhi,$Xi,$Hkey,$modulo)=@_; + +$code.=<<___ if (!defined($modulo)); + movdqa $Xi,$Xhi # + pshufd \$0b01001110,$Xi,$T1 + pshufd \$0b01001110,$Hkey,$T2 + pxor $Xi,$T1 # + pxor $Hkey,$T2 +___ +$code.=<<___; + pclmulqdq \$0x00,$Hkey,$Xi ####### + pclmulqdq \$0x11,$Hkey,$Xhi ####### + pclmulqdq \$0x00,$T2,$T1 ####### + pxor $Xi,$T1 # + pxor $Xhi,$T1 # + + movdqa $T1,$T2 # + psrldq \$8,$T1 + pslldq \$8,$T2 # + pxor $T1,$Xhi + pxor $T2,$Xi # +___ +} + +sub reduction_alg9 { # 17/13 times faster than Intel version +my ($Xhi,$Xi) = @_; + +$code.=<<___; + # 1st phase + movdqa $Xi,$T1 # + psllq \$1,$Xi + pxor $T1,$Xi # + psllq \$5,$Xi # + pxor $T1,$Xi # + psllq \$57,$Xi # + movdqa $Xi,$T2 # + pslldq \$8,$Xi + psrldq \$8,$T2 # + pxor $T1,$Xi + pxor $T2,$Xhi # + + # 2nd phase + movdqa $Xi,$T2 + psrlq \$5,$Xi + pxor $T2,$Xi # + psrlq \$1,$Xi # + pxor $T2,$Xi # + pxor $Xhi,$T2 + psrlq \$1,$Xi # + pxor $T2,$Xi # +___ +} + +{ my ($Htbl,$Xip)=@_4args; + +$code.=<<___; +.globl gcm_init_clmul +.type gcm_init_clmul,\@abi-omnipotent +.align 16 +gcm_init_clmul: + movdqu ($Xip),$Hkey + pshufd \$0b01001110,$Hkey,$Hkey # dword swap + + # <<1 twist + pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword + movdqa $Hkey,$T1 + psllq \$1,$Hkey + pxor $T3,$T3 # + psrlq \$63,$T1 + pcmpgtd $T2,$T3 # broadcast carry bit + pslldq \$8,$T1 + por $T1,$Hkey # H<<=1 + + # magic reduction + pand .L0x1c2_polynomial(%rip),$T3 + pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial + + # calculate H^2 + movdqa $Hkey,$Xi +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; + movdqu $Hkey,($Htbl) # save H + movdqu $Xi,16($Htbl) # save H^2 + ret +.size gcm_init_clmul,.-gcm_init_clmul +___ +} + +{ my ($Xip,$Htbl)=@_4args; + +$code.=<<___; +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,\@abi-omnipotent +.align 16 +gcm_gmult_clmul: + movdqu ($Xip),$Xi + movdqa .Lbswap_mask(%rip),$T3 + movdqu ($Htbl),$Hkey + pshufb $T3,$Xi +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; + pshufb $T3,$Xi + movdqu $Xi,($Xip) + ret +.size gcm_gmult_clmul,.-gcm_gmult_clmul +___ +} + +{ my ($Xip,$Htbl,$inp,$len)=@_4args; + my $Xn="%xmm6"; + my $Xhn="%xmm7"; + my $Hkey2="%xmm8"; + my $T1n="%xmm9"; + my $T2n="%xmm10"; + +$code.=<<___; +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,\@abi-omnipotent +.align 16 +gcm_ghash_clmul: +___ +$code.=<<___ if ($win64); +.LSEH_begin_gcm_ghash_clmul: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x83,0xec,0x58 #sub \$0x58,%rsp + .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) + .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) + .byte 0x44,0x0f,0x29,0x44,0x24,0x20 #movaps %xmm8,0x20(%rsp) + .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 #movaps %xmm9,0x30(%rsp) + .byte 0x44,0x0f,0x29,0x54,0x24,0x40 #movaps %xmm10,0x40(%rsp) +___ +$code.=<<___; + movdqa .Lbswap_mask(%rip),$T3 + + movdqu ($Xip),$Xi + movdqu ($Htbl),$Hkey + pshufb $T3,$Xi + + sub \$0x10,$len + jz .Lodd_tail + + movdqu 16($Htbl),$Hkey2 + ####### + # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = + # [(H*Ii+1) + (H*Xi+1)] mod P = + # [(H*Ii+1) + H^2*(Ii+Xi)] mod P + # + movdqu ($inp),$T1 # Ii + movdqu 16($inp),$Xn # Ii+1 + pshufb $T3,$T1 + pshufb $T3,$Xn + pxor $T1,$Xi # Ii+Xi +___ + &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 +$code.=<<___; + movdqa $Xi,$Xhi # + pshufd \$0b01001110,$Xi,$T1 + pshufd \$0b01001110,$Hkey2,$T2 + pxor $Xi,$T1 # + pxor $Hkey2,$T2 + + lea 32($inp),$inp # i+=2 + sub \$0x20,$len + jbe .Leven_tail + +.Lmod_loop: +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) +$code.=<<___; + movdqu ($inp),$T1 # Ii + pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) + pxor $Xhn,$Xhi + + movdqu 16($inp),$Xn # Ii+1 + pshufb $T3,$T1 + pshufb $T3,$Xn + + movdqa $Xn,$Xhn # + pshufd \$0b01001110,$Xn,$T1n + pshufd \$0b01001110,$Hkey,$T2n + pxor $Xn,$T1n # + pxor $Hkey,$T2n + pxor $T1,$Xhi # "Ii+Xi", consume early + + movdqa $Xi,$T1 # 1st phase + psllq \$1,$Xi + pxor $T1,$Xi # + psllq \$5,$Xi # + pxor $T1,$Xi # + pclmulqdq \$0x00,$Hkey,$Xn ####### + psllq \$57,$Xi # + movdqa $Xi,$T2 # + pslldq \$8,$Xi + psrldq \$8,$T2 # + pxor $T1,$Xi + pxor $T2,$Xhi # + + pclmulqdq \$0x11,$Hkey,$Xhn ####### + movdqa $Xi,$T2 # 2nd phase + psrlq \$5,$Xi + pxor $T2,$Xi # + psrlq \$1,$Xi # + pxor $T2,$Xi # + pxor $Xhi,$T2 + psrlq \$1,$Xi # + pxor $T2,$Xi # + + pclmulqdq \$0x00,$T2n,$T1n ####### + movdqa $Xi,$Xhi # + pshufd \$0b01001110,$Xi,$T1 + pshufd \$0b01001110,$Hkey2,$T2 + pxor $Xi,$T1 # + pxor $Hkey2,$T2 + + pxor $Xn,$T1n # + pxor $Xhn,$T1n # + movdqa $T1n,$T2n # + psrldq \$8,$T1n + pslldq \$8,$T2n # + pxor $T1n,$Xhn + pxor $T2n,$Xn # + + lea 32($inp),$inp + sub \$0x20,$len + ja .Lmod_loop + +.Leven_tail: +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) +$code.=<<___; + pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) + pxor $Xhn,$Xhi +___ + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; + test $len,$len + jnz .Ldone + +.Lodd_tail: + movdqu ($inp),$T1 # Ii + pshufb $T3,$T1 + pxor $T1,$Xi # Ii+Xi +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; +.Ldone: + pshufb $T3,$Xi + movdqu $Xi,($Xip) +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + movaps 0x40(%rsp),%xmm10 + add \$0x58,%rsp +___ +$code.=<<___; + ret +.LSEH_end_gcm_ghash_clmul: +.size gcm_ghash_clmul,.-gcm_ghash_clmul +___ +} + +$code.=<<___; +.align 64 +.Lbswap_mask: + .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: + .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.align 64 +.type .Lrem_4bit,\@object +.Lrem_4bit: + .long 0,`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16` + .long 0,`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16` + .long 0,`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16` + .long 0,`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16` +.type .Lrem_8bit,\@object +.Lrem_8bit: + .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E + .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E + .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E + .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E + .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E + .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E + .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E + .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E + .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE + .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE + .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE + .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE + .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E + .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E + .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE + .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE + .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E + .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E + .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E + .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E + .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E + .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E + .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E + .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E + .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE + .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE + .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE + .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE + .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E + .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E + .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE + .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.asciz "GHASH for x86_64, CRYPTOGAMS by " +.align 64 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + lea 24(%rax),%rax # adjust "rsp" + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$`1232/8`,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_gcm_gmult_4bit + .rva .LSEH_end_gcm_gmult_4bit + .rva .LSEH_info_gcm_gmult_4bit + + .rva .LSEH_begin_gcm_ghash_4bit + .rva .LSEH_end_gcm_ghash_4bit + .rva .LSEH_info_gcm_ghash_4bit + + .rva .LSEH_begin_gcm_ghash_clmul + .rva .LSEH_end_gcm_ghash_clmul + .rva .LSEH_info_gcm_ghash_clmul + +.section .xdata +.align 8 +.LSEH_info_gcm_gmult_4bit: + .byte 9,0,0,0 + .rva se_handler + .rva .Lgmult_prologue,.Lgmult_epilogue # HandlerData +.LSEH_info_gcm_ghash_4bit: + .byte 9,0,0,0 + .rva se_handler + .rva .Lghash_prologue,.Lghash_epilogue # HandlerData +.LSEH_info_gcm_ghash_clmul: + .byte 0x01,0x1f,0x0b,0x00 + .byte 0x1f,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10 + .byte 0x19,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9 + .byte 0x13,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8 + .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 + .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 + .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58 +___ +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/modes/cbc128.c b/app/openssl/crypto/modes/cbc128.c index 8f8bd563..0e54f754 100644 --- a/app/openssl/crypto/modes/cbc128.c +++ b/app/openssl/crypto/modes/cbc128.c @@ -48,7 +48,8 @@ * */ -#include "modes.h" +#include +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,12 +59,7 @@ #endif #include -#define STRICT_ALIGNMENT 1 -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT +#ifndef STRICT_ALIGNMENT # define STRICT_ALIGNMENT 0 #endif @@ -121,7 +117,7 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], block128_f block) { size_t n; - union { size_t align; unsigned char c[16]; } tmp; + union { size_t t[16/sizeof(size_t)]; unsigned char c[16]; } tmp; assert(in && out && key && ivec); @@ -141,11 +137,13 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, out += 16; } } - else { + else if (16%sizeof(size_t) == 0) { /* always true */ while (len>=16) { + size_t *out_t=(size_t *)out, *iv_t=(size_t *)iv; + (*block)(in, out, key); - for(n=0; n<16; n+=sizeof(size_t)) - *(size_t *)(out+n) ^= *(size_t *)(iv+n); + for(n=0; n<16/sizeof(size_t); n++) + out_t[n] ^= iv_t[n]; iv = in; len -= 16; in += 16; @@ -169,15 +167,16 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, out += 16; } } - else { - size_t c; + else if (16%sizeof(size_t) == 0) { /* always true */ while (len>=16) { + size_t c, *out_t=(size_t *)out, *ivec_t=(size_t *)ivec; + const size_t *in_t=(const size_t *)in; + (*block)(in, tmp.c, key); - for(n=0; n<16; n+=sizeof(size_t)) { - c = *(size_t *)(in+n); - *(size_t *)(out+n) = - *(size_t *)(tmp.c+n) ^ *(size_t *)(ivec+n); - *(size_t *)(ivec+n) = c; + for(n=0; n<16/sizeof(size_t); n++) { + c = in_t[n]; + out_t[n] = tmp.t[n] ^ ivec_t[n]; + ivec_t[n] = c; } len -= 16; in += 16; diff --git a/app/openssl/crypto/modes/ccm128.c b/app/openssl/crypto/modes/ccm128.c new file mode 100644 index 00000000..3ce11d0d --- /dev/null +++ b/app/openssl/crypto/modes/ccm128.c @@ -0,0 +1,441 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif +#include + +/* First you setup M and L parameters and pass the key schedule. + * This is called once per session setup... */ +void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, + unsigned int M,unsigned int L,void *key,block128_f block) +{ + memset(ctx->nonce.c,0,sizeof(ctx->nonce.c)); + ctx->nonce.c[0] = ((u8)(L-1)&7) | (u8)(((M-2)/2)&7)<<3; + ctx->blocks = 0; + ctx->block = block; + ctx->key = key; +} + +/* !!! Following interfaces are to be called *once* per packet !!! */ + +/* Then you setup per-message nonce and pass the length of the message */ +int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, + const unsigned char *nonce,size_t nlen,size_t mlen) +{ + unsigned int L = ctx->nonce.c[0]&7; /* the L parameter */ + + if (nlen<(14-L)) return -1; /* nonce is too short */ + + if (sizeof(mlen)==8 && L>=3) { + ctx->nonce.c[8] = (u8)(mlen>>(56%(sizeof(mlen)*8))); + ctx->nonce.c[9] = (u8)(mlen>>(48%(sizeof(mlen)*8))); + ctx->nonce.c[10] = (u8)(mlen>>(40%(sizeof(mlen)*8))); + ctx->nonce.c[11] = (u8)(mlen>>(32%(sizeof(mlen)*8))); + } + else + ctx->nonce.u[1] = 0; + + ctx->nonce.c[12] = (u8)(mlen>>24); + ctx->nonce.c[13] = (u8)(mlen>>16); + ctx->nonce.c[14] = (u8)(mlen>>8); + ctx->nonce.c[15] = (u8)mlen; + + ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */ + memcpy(&ctx->nonce.c[1],nonce,14-L); + + return 0; +} + +/* Then you pass additional authentication data, this is optional */ +void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, + const unsigned char *aad,size_t alen) +{ unsigned int i; + block128_f block = ctx->block; + + if (alen==0) return; + + ctx->nonce.c[0] |= 0x40; /* set Adata flag */ + (*block)(ctx->nonce.c,ctx->cmac.c,ctx->key), + ctx->blocks++; + + if (alen<(0x10000-0x100)) { + ctx->cmac.c[0] ^= (u8)(alen>>8); + ctx->cmac.c[1] ^= (u8)alen; + i=2; + } + else if (sizeof(alen)==8 && alen>=(size_t)1<<(32%(sizeof(alen)*8))) { + ctx->cmac.c[0] ^= 0xFF; + ctx->cmac.c[1] ^= 0xFF; + ctx->cmac.c[2] ^= (u8)(alen>>(56%(sizeof(alen)*8))); + ctx->cmac.c[3] ^= (u8)(alen>>(48%(sizeof(alen)*8))); + ctx->cmac.c[4] ^= (u8)(alen>>(40%(sizeof(alen)*8))); + ctx->cmac.c[5] ^= (u8)(alen>>(32%(sizeof(alen)*8))); + ctx->cmac.c[6] ^= (u8)(alen>>24); + ctx->cmac.c[7] ^= (u8)(alen>>16); + ctx->cmac.c[8] ^= (u8)(alen>>8); + ctx->cmac.c[9] ^= (u8)alen; + i=10; + } + else { + ctx->cmac.c[0] ^= 0xFF; + ctx->cmac.c[1] ^= 0xFE; + ctx->cmac.c[2] ^= (u8)(alen>>24); + ctx->cmac.c[3] ^= (u8)(alen>>16); + ctx->cmac.c[4] ^= (u8)(alen>>8); + ctx->cmac.c[5] ^= (u8)alen; + i=6; + } + + do { + for(;i<16 && alen;++i,++aad,--alen) + ctx->cmac.c[i] ^= *aad; + (*block)(ctx->cmac.c,ctx->cmac.c,ctx->key), + ctx->blocks++; + i=0; + } while (alen); +} + +/* Finally you encrypt or decrypt the message */ + +/* counter part of nonce may not be larger than L*8 bits, + * L is not larger than 8, therefore 64-bit counter... */ +static void ctr64_inc(unsigned char *counter) { + unsigned int n=8; + u8 c; + + counter += 8; + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key), + ctx->blocks++; + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; /* length mismatch */ + + ctx->blocks += ((len+15)>>3)|1; + if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */ + + while (len>=16) { +#if defined(STRICT_ALIGNMENT) + union { u64 u[2]; u8 c[16]; } temp; + + memcpy (temp.c,inp,16); + ctx->cmac.u[0] ^= temp.u[0]; + ctx->cmac.u[1] ^= temp.u[1]; +#else + ctx->cmac.u[0] ^= ((u64*)inp)[0]; + ctx->cmac.u[1] ^= ((u64*)inp)[1]; +#endif + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + ctr64_inc(ctx->nonce.c); +#if defined(STRICT_ALIGNMENT) + temp.u[0] ^= scratch.u[0]; + temp.u[1] ^= scratch.u[1]; + memcpy(out,temp.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]; + ((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]; +#endif + inp += 16; + out += 16; + len -= 16; + } + + if (len) { + for (i=0; icmac.c[i] ^= inp[i]; + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; inonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key); + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; + + while (len>=16) { +#if defined(STRICT_ALIGNMENT) + union { u64 u[2]; u8 c[16]; } temp; +#endif + (*block)(ctx->nonce.c,scratch.c,key); + ctr64_inc(ctx->nonce.c); +#if defined(STRICT_ALIGNMENT) + memcpy (temp.c,inp,16); + ctx->cmac.u[0] ^= (scratch.u[0] ^= temp.u[0]); + ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); + memcpy (out,scratch.c,16); +#else + ctx->cmac.u[0] ^= (((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]); + ctx->cmac.u[1] ^= (((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]); +#endif + (*block)(ctx->cmac.c,ctx->cmac.c,key); + + inp += 16; + out += 16; + len -= 16; + } + + if (len) { + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; icmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]); + (*block)(ctx->cmac.c,ctx->cmac.c,key); + } + + for (i=15-L;i<16;++i) + ctx->nonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +static void ctr64_add (unsigned char *counter,size_t inc) +{ size_t n=8, val=0; + + counter += 8; + do { + --n; + val += counter[n] + (inc&0xff); + counter[n] = (unsigned char)val; + val >>= 8; /* carry bit */ + inc >>= 8; + } while(n && (inc || val)); +} + +int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len,ccm128_f stream) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key), + ctx->blocks++; + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; /* length mismatch */ + + ctx->blocks += ((len+15)>>3)|1; + if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */ + + if ((n=len/16)) { + (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c); + n *= 16; + inp += n; + out += n; + len -= n; + if (len) ctr64_add(ctx->nonce.c,n/16); + } + + if (len) { + for (i=0; icmac.c[i] ^= inp[i]; + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; inonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len,ccm128_f stream) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key); + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; + + if ((n=len/16)) { + (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c); + n *= 16; + inp += n; + out += n; + len -= n; + if (len) ctr64_add(ctx->nonce.c,n/16); + } + + if (len) { + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; icmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]); + (*block)(ctx->cmac.c,ctx->cmac.c,key); + } + + for (i=15-L;i<16;++i) + ctx->nonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx,unsigned char *tag,size_t len) +{ unsigned int M = (ctx->nonce.c[0]>>3)&7; /* the M parameter */ + + M *= 2; M += 2; + if (lencmac.c,M); + return M; +} diff --git a/app/openssl/crypto/modes/cfb128.c b/app/openssl/crypto/modes/cfb128.c index e5938c61..4e6f5d35 100644 --- a/app/openssl/crypto/modes/cfb128.c +++ b/app/openssl/crypto/modes/cfb128.c @@ -48,7 +48,8 @@ * */ -#include "modes.h" +#include +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,14 +59,6 @@ #endif #include -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* The input and output encrypted as though 128bit cfb mode is being * used. The extra state information to record how much of the * 128bit block we have used is contained in *num; diff --git a/app/openssl/crypto/modes/ctr128.c b/app/openssl/crypto/modes/ctr128.c index 932037f5..ee642c58 100644 --- a/app/openssl/crypto/modes/ctr128.c +++ b/app/openssl/crypto/modes/ctr128.c @@ -48,7 +48,8 @@ * */ -#include "modes.h" +#include +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,17 +59,6 @@ #endif #include -typedef unsigned int u32; -typedef unsigned char u8; - -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* NOTE: the IV/counter CTR mode is big-endian. The code itself * is endian-neutral. */ @@ -182,3 +172,81 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, *num=n; } + +/* increment upper 96 bits of 128-bit counter by 1 */ +static void ctr96_inc(unsigned char *counter) { + u32 n=12; + u8 c; + + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], unsigned char ecount_buf[16], + unsigned int *num, ctr128_f func) +{ + unsigned int n,ctr32; + + assert(in && out && key && ecount_buf && num); + assert(*num < 16); + + n = *num; + + while (n && len) { + *(out++) = *(in++) ^ ecount_buf[n]; + --len; + n = (n+1) % 16; + } + + ctr32 = GETU32(ivec+12); + while (len>=16) { + size_t blocks = len/16; + /* + * 1<<28 is just a not-so-small yet not-so-large number... + * Below condition is practically never met, but it has to + * be checked for code correctness. + */ + if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28)) + blocks = (1U<<28); + /* + * As (*func) operates on 32-bit counter, caller + * has to handle overflow. 'if' below detects the + * overflow, which is then handled by limiting the + * amount of blocks to the exact overflow point... + */ + ctr32 += (u32)blocks; + if (ctr32 < blocks) { + blocks -= ctr32; + ctr32 = 0; + } + (*func)(in,out,blocks,key,ivec); + /* (*ctr) does not update ivec, caller does: */ + PUTU32(ivec+12,ctr32); + /* ... overflow was detected, propogate carry. */ + if (ctr32 == 0) ctr96_inc(ivec); + blocks *= 16; + len -= blocks; + out += blocks; + in += blocks; + } + if (len) { + memset(ecount_buf,0,16); + (*func)(ecount_buf,ecount_buf,1,key,ivec); + ++ctr32; + PUTU32(ivec+12,ctr32); + if (ctr32 == 0) ctr96_inc(ivec); + while (len--) { + out[n] = in[n] ^ ecount_buf[n]; + ++n; + } + } + + *num=n; +} diff --git a/app/openssl/crypto/modes/gcm128.c b/app/openssl/crypto/modes/gcm128.c new file mode 100644 index 00000000..e1dc2b0f --- /dev/null +++ b/app/openssl/crypto/modes/gcm128.c @@ -0,0 +1,1905 @@ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#define OPENSSL_FIPSAPI + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif +#include + +#if defined(BSWAP4) && defined(STRICT_ALIGNMENT) +/* redefine, because alignment is ensured */ +#undef GETU32 +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#undef PUTU32 +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +#endif + +#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) +#define REDUCE1BIT(V) do { \ + if (sizeof(size_t)==8) { \ + u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^T; \ + } \ + else { \ + u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^((u64)T<<32); \ + } \ +} while(0) + +/* + * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should + * never be set to 8. 8 is effectively reserved for testing purposes. + * TABLE_BITS>1 are lookup-table-driven implementations referred to as + * "Shoup's" in GCM specification. In other words OpenSSL does not cover + * whole spectrum of possible table driven implementations. Why? In + * non-"Shoup's" case memory access pattern is segmented in such manner, + * that it's trivial to see that cache timing information can reveal + * fair portion of intermediate hash value. Given that ciphertext is + * always available to attacker, it's possible for him to attempt to + * deduce secret parameter H and if successful, tamper with messages + * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's + * not as trivial, but there is no reason to believe that it's resistant + * to cache-timing attack. And the thing about "8-bit" implementation is + * that it consumes 16 (sixteen) times more memory, 4KB per individual + * key + 1KB shared. Well, on pros side it should be twice as fast as + * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version + * was observed to run ~75% faster, closer to 100% for commercial + * compilers... Yet "4-bit" procedure is preferred, because it's + * believed to provide better security-performance balance and adequate + * all-round performance. "All-round" refers to things like: + * + * - shorter setup time effectively improves overall timing for + * handling short messages; + * - larger table allocation can become unbearable because of VM + * subsystem penalties (for example on Windows large enough free + * results in VM working set trimming, meaning that consequent + * malloc would immediately incur working set expansion); + * - larger table has larger cache footprint, which can affect + * performance of other code paths (not necessarily even from same + * thread in Hyper-Threading world); + * + * Value of 1 is not appropriate for performance reasons. + */ +#if TABLE_BITS==8 + +static void gcm_init_8bit(u128 Htable[256], u64 H[2]) +{ + int i, j; + u128 V; + + Htable[0].hi = 0; + Htable[0].lo = 0; + V.hi = H[0]; + V.lo = H[1]; + + for (Htable[128]=V, i=64; i>0; i>>=1) { + REDUCE1BIT(V); + Htable[i] = V; + } + + for (i=2; i<256; i<<=1) { + u128 *Hi = Htable+i, H0 = *Hi; + for (j=1; j>8); + Z.hi = (Z.hi>>8); + if (sizeof(size_t)==8) + Z.hi ^= rem_8bit[rem]; + else + Z.hi ^= (u64)rem_8bit[rem]<<32; + } + + if (is_endian.little) { +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif + } + else { + Xi[0] = Z.hi; + Xi[1] = Z.lo; + } +} +#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) + +#elif TABLE_BITS==4 + +static void gcm_init_4bit(u128 Htable[16], u64 H[2]) +{ + u128 V; +#if defined(OPENSSL_SMALL_FOOTPRINT) + int i; +#endif + + Htable[0].hi = 0; + Htable[0].lo = 0; + V.hi = H[0]; + V.lo = H[1]; + +#if defined(OPENSSL_SMALL_FOOTPRINT) + for (Htable[8]=V, i=4; i>0; i>>=1) { + REDUCE1BIT(V); + Htable[i] = V; + } + + for (i=2; i<16; i<<=1) { + u128 *Hi = Htable+i; + int j; + for (V=*Hi, j=1; j>32; + Htable[j].lo = V.hi<<32|V.hi>>32; + } + } +#endif +} + +#ifndef GHASH_ASM +static const size_t rem_4bit[16] = { + PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), + PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), + PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), + PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; + +static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) +{ + u128 Z; + int cnt = 15; + size_t rem, nlo, nhi; + const union { long one; char little; } is_endian = {1}; + + nlo = ((const u8 *)Xi)[15]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi = Htable[nlo].hi; + Z.lo = Htable[nlo].lo; + + while (1) { + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + if (sizeof(size_t)==8) + Z.hi ^= rem_4bit[rem]; + else + Z.hi ^= (u64)rem_4bit[rem]<<32; + + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + + if (--cnt<0) break; + + nlo = ((const u8 *)Xi)[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + if (sizeof(size_t)==8) + Z.hi ^= rem_4bit[rem]; + else + Z.hi ^= (u64)rem_4bit[rem]<<32; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + } + + if (is_endian.little) { +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif + } + else { + Xi[0] = Z.hi; + Xi[1] = Z.lo; + } +} + +#if !defined(OPENSSL_SMALL_FOOTPRINT) +/* + * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for + * details... Compiler-generated code doesn't seem to give any + * performance improvement, at least not on x86[_64]. It's here + * mostly as reference and a placeholder for possible future + * non-trivial optimization[s]... + */ +static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) +{ + u128 Z; + int cnt; + size_t rem, nlo, nhi; + const union { long one; char little; } is_endian = {1}; + +#if 1 + do { + cnt = 15; + nlo = ((const u8 *)Xi)[15]; + nlo ^= inp[15]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi = Htable[nlo].hi; + Z.lo = Htable[nlo].lo; + + while (1) { + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + if (sizeof(size_t)==8) + Z.hi ^= rem_4bit[rem]; + else + Z.hi ^= (u64)rem_4bit[rem]<<32; + + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + + if (--cnt<0) break; + + nlo = ((const u8 *)Xi)[cnt]; + nlo ^= inp[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + if (sizeof(size_t)==8) + Z.hi ^= rem_4bit[rem]; + else + Z.hi ^= (u64)rem_4bit[rem]<<32; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + } +#else + /* + * Extra 256+16 bytes per-key plus 512 bytes shared tables + * [should] give ~50% improvement... One could have PACK()-ed + * the rem_8bit even here, but the priority is to minimize + * cache footprint... + */ + u128 Hshr4[16]; /* Htable shifted right by 4 bits */ + u8 Hshl4[16]; /* Htable shifted left by 4 bits */ + static const unsigned short rem_8bit[256] = { + 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, + 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, + 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, + 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, + 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, + 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, + 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, + 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, + 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, + 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, + 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, + 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, + 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, + 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, + 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, + 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, + 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, + 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, + 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, + 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, + 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, + 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, + 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, + 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, + 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, + 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, + 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, + 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, + 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, + 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, + 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, + 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; + /* + * This pre-processing phase slows down procedure by approximately + * same time as it makes each loop spin faster. In other words + * single block performance is approximately same as straightforward + * "4-bit" implementation, and then it goes only faster... + */ + for (cnt=0; cnt<16; ++cnt) { + Z.hi = Htable[cnt].hi; + Z.lo = Htable[cnt].lo; + Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4); + Hshr4[cnt].hi = (Z.hi>>4); + Hshl4[cnt] = (u8)(Z.lo<<4); + } + + do { + for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) { + nlo = ((const u8 *)Xi)[cnt]; + nlo ^= inp[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + + rem = (size_t)Z.lo&0xff; + + Z.lo = (Z.hi<<56)|(Z.lo>>8); + Z.hi = (Z.hi>>8); + + Z.hi ^= Hshr4[nhi].hi; + Z.lo ^= Hshr4[nhi].lo; + Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48; + } + + nlo = ((const u8 *)Xi)[0]; + nlo ^= inp[0]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + + rem = (size_t)Z.lo&0xf; + + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + Z.hi ^= ((u64)rem_8bit[rem<<4])<<48; +#endif + + if (is_endian.little) { +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif + } + else { + Xi[0] = Z.hi; + Xi[1] = Z.lo; + } + } while (inp+=16, len-=16); +} +#endif +#else +void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +#endif + +#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) +#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) +#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) +/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache + * trashing effect. In other words idea is to hash data while it's + * still in L1 cache after encryption pass... */ +#define GHASH_CHUNK (3*1024) +#endif + +#else /* TABLE_BITS */ + +static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) +{ + u128 V,Z = { 0,0 }; + long X; + int i,j; + const long *xi = (const long *)Xi; + const union { long one; char little; } is_endian = {1}; + + V.hi = H[0]; /* H is in host byte order, no byte swapping */ + V.lo = H[1]; + + for (j=0; j<16/sizeof(long); ++j) { + if (is_endian.little) { + if (sizeof(long)==8) { +#ifdef BSWAP8 + X = (long)(BSWAP8(xi[j])); +#else + const u8 *p = (const u8 *)(xi+j); + X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); +#endif + } + else { + const u8 *p = (const u8 *)(xi+j); + X = (long)GETU32(p); + } + } + else + X = xi[j]; + + for (i=0; i<8*sizeof(long); ++i, X<<=1) { + u64 M = (u64)(X>>(8*sizeof(long)-1)); + Z.hi ^= V.hi&M; + Z.lo ^= V.lo&M; + + REDUCE1BIT(V); + } + } + + if (is_endian.little) { +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif + } + else { + Xi[0] = Z.hi; + Xi[1] = Z.lo; + } +} +#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) + +#endif + +#if TABLE_BITS==4 && defined(GHASH_ASM) +# if !defined(I386_ONLY) && \ + (defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) +# define GHASH_ASM_X86_OR_64 +# define GCM_FUNCREF_4BIT +extern unsigned int OPENSSL_ia32cap_P[2]; + +void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); +void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) +# define GHASH_ASM_X86 +void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + +void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# endif +# elif defined(__arm__) || defined(__arm) +# include "arm_arch.h" +# if __ARM_ARCH__>=7 +# define GHASH_ASM_ARM +# define GCM_FUNCREF_4BIT +void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# endif +# endif +#endif + +#ifdef GCM_FUNCREF_4BIT +# undef GCM_MUL +# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) +# ifdef GHASH +# undef GHASH +# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) +# endif +#endif + +void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) +{ + const union { long one; char little; } is_endian = {1}; + + memset(ctx,0,sizeof(*ctx)); + ctx->block = block; + ctx->key = key; + + (*block)(ctx->H.c,ctx->H.c,key); + + if (is_endian.little) { + /* H is stored in host byte order */ +#ifdef BSWAP8 + ctx->H.u[0] = BSWAP8(ctx->H.u[0]); + ctx->H.u[1] = BSWAP8(ctx->H.u[1]); +#else + u8 *p = ctx->H.c; + u64 hi,lo; + hi = (u64)GETU32(p) <<32|GETU32(p+4); + lo = (u64)GETU32(p+8)<<32|GETU32(p+12); + ctx->H.u[0] = hi; + ctx->H.u[1] = lo; +#endif + } + +#if TABLE_BITS==8 + gcm_init_8bit(ctx->Htable,ctx->H.u); +#elif TABLE_BITS==4 +# if defined(GHASH_ASM_X86_OR_64) +# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) + if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ + OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ + gcm_init_clmul(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_clmul; + ctx->ghash = gcm_ghash_clmul; + return; + } +# endif + gcm_init_4bit(ctx->Htable,ctx->H.u); +# if defined(GHASH_ASM_X86) /* x86 only */ +# if defined(OPENSSL_IA32_SSE2) + if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ +# else + if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ +# endif + ctx->gmult = gcm_gmult_4bit_mmx; + ctx->ghash = gcm_ghash_4bit_mmx; + } else { + ctx->gmult = gcm_gmult_4bit_x86; + ctx->ghash = gcm_ghash_4bit_x86; + } +# else + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; +# endif +# elif defined(GHASH_ASM_ARM) + if (OPENSSL_armcap_P & ARMV7_NEON) { + ctx->gmult = gcm_gmult_neon; + ctx->ghash = gcm_ghash_neon; + } else { + gcm_init_4bit(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } +# else + gcm_init_4bit(ctx->Htable,ctx->H.u); +# endif +#endif +} + +void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) +{ + const union { long one; char little; } is_endian = {1}; + unsigned int ctr; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +#endif + + ctx->Yi.u[0] = 0; + ctx->Yi.u[1] = 0; + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + ctx->len.u[0] = 0; /* AAD length */ + ctx->len.u[1] = 0; /* message length */ + ctx->ares = 0; + ctx->mres = 0; + + if (len==12) { + memcpy(ctx->Yi.c,iv,12); + ctx->Yi.c[15]=1; + ctr=1; + } + else { + size_t i; + u64 len0 = len; + + while (len>=16) { + for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; + GCM_MUL(ctx,Yi); + iv += 16; + len -= 16; + } + if (len) { + for (i=0; iYi.c[i] ^= iv[i]; + GCM_MUL(ctx,Yi); + } + len0 <<= 3; + if (is_endian.little) { +#ifdef BSWAP8 + ctx->Yi.u[1] ^= BSWAP8(len0); +#else + ctx->Yi.c[8] ^= (u8)(len0>>56); + ctx->Yi.c[9] ^= (u8)(len0>>48); + ctx->Yi.c[10] ^= (u8)(len0>>40); + ctx->Yi.c[11] ^= (u8)(len0>>32); + ctx->Yi.c[12] ^= (u8)(len0>>24); + ctx->Yi.c[13] ^= (u8)(len0>>16); + ctx->Yi.c[14] ^= (u8)(len0>>8); + ctx->Yi.c[15] ^= (u8)(len0); +#endif + } + else + ctx->Yi.u[1] ^= len0; + + GCM_MUL(ctx,Yi); + + if (is_endian.little) +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif + else + ctr = ctx->Yi.d[3]; + } + + (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; +} + +int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) +{ + size_t i; + unsigned int n; + u64 alen = ctx->len.u[0]; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + if (ctx->len.u[1]) return -2; + + alen += len; + if (alen>(U64(1)<<61) || (sizeof(len)==8 && alenlen.u[0] = alen; + + n = ctx->ares; + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(aad++); + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->ares = n; + return 0; + } + } + +#ifdef GHASH + if ((i = (len&(size_t)-16))) { + GHASH(ctx,aad,i); + aad += i; + len -= i; + } +#else + while (len>=16) { + for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; + GCM_MUL(ctx,Xi); + aad += 16; + len -= 16; + } +#endif + if (len) { + n = (unsigned int)len; + for (i=0; iXi.c[i] ^= aad[i]; + } + + ctx->ares = n; + return 0; +} + +int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + const union { long one; char little; } is_endian = {1}; + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + block128_f block = ctx->block; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + +#if 0 + n = (unsigned int)mlen%16; /* alternative to ctx->mres */ +#endif + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to encrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + + if (is_endian.little) +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif + else + ctr = ctx->Yi.d[3]; + + n = ctx->mres; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(STRICT_ALIGNMENT) + if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) + break; +#endif +#if defined(GHASH) && defined(GHASH_CHUNK) + while (len>=GHASH_CHUNK) { + size_t j=GHASH_CHUNK; + + while (j) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i] ^ ctx->EKi.t[i]; + out += 16; + in += 16; + j -= 16; + } + GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK); + len -= GHASH_CHUNK; + } + if ((i = (len&(size_t)-16))) { + size_t j=i; + + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i] ^ ctx->EKi.t[i]; + out += 16; + in += 16; + len -= 16; + } + GHASH(ctx,out-j,j); + } +#else + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) + ctx->Xi.t[i] ^= + out_t[i] = in_t[i]^ctx->EKi.t[i]; + GCM_MUL(ctx,Xi); + out += 16; + in += 16; + len -= 16; + } +#endif + if (len) { + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + while (len--) { + ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; + } while(0); +#endif + for (i=0;iYi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + } + ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; + n = (n+1)%16; + if (n==0) + GCM_MUL(ctx,Xi); + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + const union { long one; char little; } is_endian = {1}; + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + block128_f block = ctx->block; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to decrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + + if (is_endian.little) +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif + else + ctr = ctx->Yi.d[3]; + + n = ctx->mres; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + if (n) { + while (n && len) { + u8 c = *(in++); + *(out++) = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL (ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(STRICT_ALIGNMENT) + if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) + break; +#endif +#if defined(GHASH) && defined(GHASH_CHUNK) + while (len>=GHASH_CHUNK) { + size_t j=GHASH_CHUNK; + + GHASH(ctx,in,GHASH_CHUNK); + while (j) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i]^ctx->EKi.t[i]; + out += 16; + in += 16; + j -= 16; + } + len -= GHASH_CHUNK; + } + if ((i = (len&(size_t)-16))) { + GHASH(ctx,in,i); + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i]^ctx->EKi.t[i]; + out += 16; + in += 16; + len -= 16; + } + } +#else + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + for (i=0; i<16/sizeof(size_t); ++i) { + size_t c = in[i]; + out[i] = c^ctx->EKi.t[i]; + ctx->Xi.t[i] ^= c; + } + GCM_MUL(ctx,Xi); + out += 16; + in += 16; + len -= 16; + } +#endif + if (len) { + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + while (len--) { + u8 c = in[n]; + ctx->Xi.c[n] ^= c; + out[n] = c^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; + } while(0); +#endif + for (i=0;iYi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + } + c = in[i]; + out[i] = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + n = (n+1)%16; + if (n==0) + GCM_MUL(ctx,Xi); + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len, ctr128_f stream) +{ + const union { long one; char little; } is_endian = {1}; + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to encrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + + if (is_endian.little) +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif + else + ctr = ctx->Yi.d[3]; + + n = ctx->mres; + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + while (len>=GHASH_CHUNK) { + (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); + ctr += GHASH_CHUNK/16; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + GHASH(ctx,out,GHASH_CHUNK); + out += GHASH_CHUNK; + in += GHASH_CHUNK; + len -= GHASH_CHUNK; + } +#endif + if ((i = (len&(size_t)-16))) { + size_t j=i/16; + + (*stream)(in,out,j,key,ctx->Yi.c); + ctr += (unsigned int)j; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + in += i; + len -= i; +#if defined(GHASH) + GHASH(ctx,out,i); + out += i; +#else + while (j--) { + for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i]; + GCM_MUL(ctx,Xi); + out += 16; + } +#endif + } + if (len) { + (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + while (len--) { + ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len,ctr128_f stream) +{ + const union { long one; char little; } is_endian = {1}; + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to decrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + + if (is_endian.little) +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif + else + ctr = ctx->Yi.d[3]; + + n = ctx->mres; + if (n) { + while (n && len) { + u8 c = *(in++); + *(out++) = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL (ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + while (len>=GHASH_CHUNK) { + GHASH(ctx,in,GHASH_CHUNK); + (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); + ctr += GHASH_CHUNK/16; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + out += GHASH_CHUNK; + in += GHASH_CHUNK; + len -= GHASH_CHUNK; + } +#endif + if ((i = (len&(size_t)-16))) { + size_t j=i/16; + +#if defined(GHASH) + GHASH(ctx,in,i); +#else + while (j--) { + size_t k; + for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k]; + GCM_MUL(ctx,Xi); + in += 16; + } + j = i/16; + in -= i; +#endif + (*stream)(in,out,j,key,ctx->Yi.c); + ctr += (unsigned int)j; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + out += i; + in += i; + len -= i; + } + if (len) { + (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; + if (is_endian.little) +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif + else + ctx->Yi.d[3] = ctr; + while (len--) { + u8 c = in[n]; + ctx->Xi.c[n] ^= c; + out[n] = c^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, + size_t len) +{ + const union { long one; char little; } is_endian = {1}; + u64 alen = ctx->len.u[0]<<3; + u64 clen = ctx->len.u[1]<<3; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +#endif + + if (ctx->mres || ctx->ares) + GCM_MUL(ctx,Xi); + + if (is_endian.little) { +#ifdef BSWAP8 + alen = BSWAP8(alen); + clen = BSWAP8(clen); +#else + u8 *p = ctx->len.c; + + ctx->len.u[0] = alen; + ctx->len.u[1] = clen; + + alen = (u64)GETU32(p) <<32|GETU32(p+4); + clen = (u64)GETU32(p+8)<<32|GETU32(p+12); +#endif + } + + ctx->Xi.u[0] ^= alen; + ctx->Xi.u[1] ^= clen; + GCM_MUL(ctx,Xi); + + ctx->Xi.u[0] ^= ctx->EK0.u[0]; + ctx->Xi.u[1] ^= ctx->EK0.u[1]; + + if (tag && len<=sizeof(ctx->Xi)) + return memcmp(ctx->Xi.c,tag,len); + else + return -1; +} + +void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) +{ + CRYPTO_gcm128_finish(ctx, NULL, 0); + memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c)); +} + +GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) +{ + GCM128_CONTEXT *ret; + + if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT)))) + CRYPTO_gcm128_init(ret,key,block); + + return ret; +} + +void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) +{ + if (ctx) { + OPENSSL_cleanse(ctx,sizeof(*ctx)); + OPENSSL_free(ctx); + } +} + +#if defined(SELFTEST) +#include +#include + +/* Test Case 1 */ +static const u8 K1[16], + *P1=NULL, + *A1=NULL, + IV1[12], + *C1=NULL, + T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a}; + +/* Test Case 2 */ +#define K2 K1 +#define A2 A1 +#define IV2 IV1 +static const u8 P2[16], + C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78}, + T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf}; + +/* Test Case 3 */ +#define A3 A2 +static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, + P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, + IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, + C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, + 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, + 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, + 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85}, + T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4}; + +/* Test Case 4 */ +#define K4 K3 +#define IV4 IV3 +static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, + A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xab,0xad,0xda,0xd2}, + C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, + 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, + 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, + 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91}, + T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47}; + +/* Test Case 5 */ +#define K5 K4 +#define P5 P4 +#define A5 A4 +static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, + C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55, + 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23, + 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42, + 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98}, + T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb}; + +/* Test Case 6 */ +#define K6 K5 +#define P6 P5 +#define A6 A5 +static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, + 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, + 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, + 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, + C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94, + 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7, + 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f, + 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5}, + T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50}; + +/* Test Case 7 */ +static const u8 K7[24], + *P7=NULL, + *A7=NULL, + IV7[12], + *C7=NULL, + T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35}; + +/* Test Case 8 */ +#define K8 K7 +#define IV8 IV7 +#define A8 A7 +static const u8 P8[16], + C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00}, + T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb}; + +/* Test Case 9 */ +#define A9 A8 +static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, + 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c}, + P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, + IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, + C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, + 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, + 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, + 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56}, + T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14}; + +/* Test Case 10 */ +#define K10 K9 +#define IV10 IV9 +static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, + A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xab,0xad,0xda,0xd2}, + C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, + 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, + 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, + 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10}, + T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c}; + +/* Test Case 11 */ +#define K11 K10 +#define P11 P10 +#define A11 A10 +static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, + C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8, + 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57, + 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9, + 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7}, + T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8}; + +/* Test Case 12 */ +#define K12 K11 +#define P12 P11 +#define A12 A11 +static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, + 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, + 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, + 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, + C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff, + 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45, + 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3, + 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b}, + T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9}; + +/* Test Case 13 */ +static const u8 K13[32], + *P13=NULL, + *A13=NULL, + IV13[12], + *C13=NULL, + T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b}; + +/* Test Case 14 */ +#define K14 K13 +#define A14 A13 +static const u8 P14[16], + IV14[12], + C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18}, + T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19}; + +/* Test Case 15 */ +#define A15 A14 +static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, + 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, + P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, + IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, + C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, + 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, + 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, + 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad}, + T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c}; + +/* Test Case 16 */ +#define K16 K15 +#define IV16 IV15 +static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, + A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xab,0xad,0xda,0xd2}, + C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, + 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, + 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, + 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62}, + T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b}; + +/* Test Case 17 */ +#define K17 K16 +#define P17 P16 +#define A17 A16 +static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, + C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb, + 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0, + 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78, + 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f}, + T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2}; + +/* Test Case 18 */ +#define K18 K17 +#define P18 P17 +#define A18 A17 +static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, + 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, + 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, + 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, + C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20, + 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4, + 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde, + 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f}, + T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a}; + +/* Test Case 19 */ +#define K19 K1 +#define P19 P1 +#define IV19 IV1 +#define C19 C1 +static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55, + 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, + 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, + 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, + 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad}, + T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92}; + +/* Test Case 20 */ +#define K20 K1 +#define A20 A1 +static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */ + P20[288], + C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14, + 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f, + 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18, + 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49, + 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c, + 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29, + 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76, + 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce, + 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86, + 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18, + 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42, + 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06, + 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c, + 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64, + 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6, + 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74, + 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46, + 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c}, + T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f}; + +#define TEST_CASE(n) do { \ + u8 out[sizeof(P##n)]; \ + AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \ + CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \ + CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ + memset(out,0,sizeof(out)); \ + if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ + if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \ + if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ + (C##n && memcmp(out,C##n,sizeof(out)))) \ + ret++, printf ("encrypt test#%d failed.\n",n); \ + CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ + memset(out,0,sizeof(out)); \ + if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ + if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \ + if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ + (P##n && memcmp(out,P##n,sizeof(out)))) \ + ret++, printf ("decrypt test#%d failed.\n",n); \ + } while(0) + +int main() +{ + GCM128_CONTEXT ctx; + AES_KEY key; + int ret=0; + + TEST_CASE(1); + TEST_CASE(2); + TEST_CASE(3); + TEST_CASE(4); + TEST_CASE(5); + TEST_CASE(6); + TEST_CASE(7); + TEST_CASE(8); + TEST_CASE(9); + TEST_CASE(10); + TEST_CASE(11); + TEST_CASE(12); + TEST_CASE(13); + TEST_CASE(14); + TEST_CASE(15); + TEST_CASE(16); + TEST_CASE(17); + TEST_CASE(18); + TEST_CASE(19); + TEST_CASE(20); + +#ifdef OPENSSL_CPUID_OBJ + { + size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc(); + union { u64 u; u8 c[1024]; } buf; + int i; + + AES_set_encrypt_key(K1,sizeof(K1)*8,&key); + CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); + CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1)); + + CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); + start = OPENSSL_rdtsc(); + CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); + gcm_t = OPENSSL_rdtsc() - start; + + CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), + &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres, + (block128_f)AES_encrypt); + start = OPENSSL_rdtsc(); + CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), + &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres, + (block128_f)AES_encrypt); + ctr_t = OPENSSL_rdtsc() - start; + + printf("%.2f-%.2f=%.2f\n", + gcm_t/(double)sizeof(buf), + ctr_t/(double)sizeof(buf), + (gcm_t-ctr_t)/(double)sizeof(buf)); +#ifdef GHASH + { + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx.ghash; + + GHASH((&ctx),buf.c,sizeof(buf)); + start = OPENSSL_rdtsc(); + for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf)); + gcm_t = OPENSSL_rdtsc() - start; + printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i); + } +#endif + } +#endif + + return ret; +} +#endif diff --git a/app/openssl/crypto/modes/modes_lcl.h b/app/openssl/crypto/modes/modes_lcl.h new file mode 100644 index 00000000..9d83e128 --- /dev/null +++ b/app/openssl/crypto/modes/modes_lcl.h @@ -0,0 +1,128 @@ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use is governed by OpenSSL license. + * ==================================================================== + */ + +#include + + +#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +typedef __int64 i64; +typedef unsigned __int64 u64; +#define U64(C) C##UI64 +#elif defined(__arch64__) +typedef long i64; +typedef unsigned long u64; +#define U64(C) C##UL +#else +typedef long long i64; +typedef unsigned long long u64; +#define U64(C) C##ULL +#endif + +typedef unsigned int u32; +typedef unsigned char u8; + +#define STRICT_ALIGNMENT 1 +#if defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ + defined(__s390__) || defined(__s390x__) +# undef STRICT_ALIGNMENT +#endif + +#if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) +#if defined(__GNUC__) && __GNUC__>=2 +# if defined(__x86_64) || defined(__x86_64__) +# define BSWAP8(x) ({ u64 ret=(x); \ + asm ("bswapq %0" \ + : "+r"(ret)); ret; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm ("bswapl %0" \ + : "+r"(ret)); ret; }) +# elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY) +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ + asm ("bswapl %0; bswapl %1" \ + : "+r"(hi),"+r"(lo)); \ + (u64)hi<<32|lo; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm ("bswapl %0" \ + : "+r"(ret)); ret; }) +# elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT) +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ + asm ("rev %0,%0; rev %1,%1" \ + : "+r"(hi),"+r"(lo)); \ + (u64)hi<<32|lo; }) +# define BSWAP4(x) ({ u32 ret; \ + asm ("rev %0,%1" \ + : "=r"(ret) : "r"((u32)(x))); \ + ret; }) +# endif +#elif defined(_MSC_VER) +# if _MSC_VER>=1300 +# pragma intrinsic(_byteswap_uint64,_byteswap_ulong) +# define BSWAP8(x) _byteswap_uint64((u64)(x)) +# define BSWAP4(x) _byteswap_ulong((u32)(x)) +# elif defined(_M_IX86) + __inline u32 _bswap4(u32 val) { + _asm mov eax,val + _asm bswap eax + } +# define BSWAP4(x) _bswap4(x) +# endif +#endif +#endif + +#if defined(BSWAP4) && !defined(STRICT_ALIGNMENT) +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +#else +#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) +#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) +#endif + +/* GCM definitions */ + +typedef struct { u64 hi,lo; } u128; + +#ifdef TABLE_BITS +#undef TABLE_BITS +#endif +/* + * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should + * never be set to 8 [or 1]. For further information see gcm128.c. + */ +#define TABLE_BITS 4 + +struct gcm128_context { + /* Following 6 names follow names in GCM specification */ + union { u64 u[2]; u32 d[4]; u8 c[16]; size_t t[16/sizeof(size_t)]; } + Yi,EKi,EK0,len,Xi,H; + /* Relative position of Xi, H and pre-computed Htable is used + * in some assembler modules, i.e. don't change the order! */ +#if TABLE_BITS==8 + u128 Htable[256]; +#else + u128 Htable[16]; + void (*gmult)(u64 Xi[2],const u128 Htable[16]); + void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +#endif + unsigned int mres, ares; + block128_f block; + void *key; +}; + +struct xts128_context { + void *key1, *key2; + block128_f block1,block2; +}; + +struct ccm128_context { + union { u64 u[2]; u8 c[16]; } nonce, cmac; + u64 blocks; + block128_f block; + void *key; +}; + diff --git a/app/openssl/crypto/modes/ofb128.c b/app/openssl/crypto/modes/ofb128.c index c732e2ec..01c01702 100644 --- a/app/openssl/crypto/modes/ofb128.c +++ b/app/openssl/crypto/modes/ofb128.c @@ -48,7 +48,8 @@ * */ -#include "modes.h" +#include +#include "modes_lcl.h" #include #ifndef MODES_DEBUG @@ -58,14 +59,6 @@ #endif #include -#define STRICT_ALIGNMENT -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT -#endif - /* The input and output encrypted as though 128bit ofb mode is being * used. The extra state information to record how much of the * 128bit block we have used is contained in *num; diff --git a/app/openssl/crypto/modes/xts128.c b/app/openssl/crypto/modes/xts128.c new file mode 100644 index 00000000..9cf27a25 --- /dev/null +++ b/app/openssl/crypto/modes/xts128.c @@ -0,0 +1,187 @@ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif +#include + +int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], + const unsigned char *inp, unsigned char *out, + size_t len, int enc) +{ + const union { long one; char little; } is_endian = {1}; + union { u64 u[2]; u32 d[4]; u8 c[16]; } tweak, scratch; + unsigned int i; + + if (len<16) return -1; + + memcpy(tweak.c, iv, 16); + + (*ctx->block2)(tweak.c,tweak.c,ctx->key2); + + if (!enc && (len%16)) len-=16; + + while (len>=16) { +#if defined(STRICT_ALIGNMENT) + memcpy(scratch.c,inp,16); + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; +#else + scratch.u[0] = ((u64*)inp)[0]^tweak.u[0]; + scratch.u[1] = ((u64*)inp)[1]^tweak.u[1]; +#endif + (*ctx->block1)(scratch.c,scratch.c,ctx->key1); +#if defined(STRICT_ALIGNMENT) + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy(out,scratch.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^=tweak.u[0]; + ((u64*)out)[1] = scratch.u[1]^=tweak.u[1]; +#endif + inp += 16; + out += 16; + len -= 16; + + if (len==0) return 0; + + if (is_endian.little) { + unsigned int carry,res; + + res = 0x87&(((int)tweak.d[3])>>31); + carry = (unsigned int)(tweak.u[0]>>63); + tweak.u[0] = (tweak.u[0]<<1)^res; + tweak.u[1] = (tweak.u[1]<<1)|carry; + } + else { + size_t c; + + for (c=0,i=0;i<16;++i) { + /*+ substitutes for |, because c is 1 bit */ + c += ((size_t)tweak.c[i])<<1; + tweak.c[i] = (u8)c; + c = c>>8; + } + tweak.c[0] ^= (u8)(0x87&(0-c)); + } + } + if (enc) { + for (i=0;iblock1)(scratch.c,scratch.c,ctx->key1); + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy(out-16,scratch.c,16); + } + else { + union { u64 u[2]; u8 c[16]; } tweak1; + + if (is_endian.little) { + unsigned int carry,res; + + res = 0x87&(((int)tweak.d[3])>>31); + carry = (unsigned int)(tweak.u[0]>>63); + tweak1.u[0] = (tweak.u[0]<<1)^res; + tweak1.u[1] = (tweak.u[1]<<1)|carry; + } + else { + size_t c; + + for (c=0,i=0;i<16;++i) { + /*+ substitutes for |, because c is 1 bit */ + c += ((size_t)tweak.c[i])<<1; + tweak1.c[i] = (u8)c; + c = c>>8; + } + tweak1.c[0] ^= (u8)(0x87&(0-c)); + } +#if defined(STRICT_ALIGNMENT) + memcpy(scratch.c,inp,16); + scratch.u[0] ^= tweak1.u[0]; + scratch.u[1] ^= tweak1.u[1]; +#else + scratch.u[0] = ((u64*)inp)[0]^tweak1.u[0]; + scratch.u[1] = ((u64*)inp)[1]^tweak1.u[1]; +#endif + (*ctx->block1)(scratch.c,scratch.c,ctx->key1); + scratch.u[0] ^= tweak1.u[0]; + scratch.u[1] ^= tweak1.u[1]; + + for (i=0;iblock1)(scratch.c,scratch.c,ctx->key1); +#if defined(STRICT_ALIGNMENT) + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy (out,scratch.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^tweak.u[0]; + ((u64*)out)[1] = scratch.u[1]^tweak.u[1]; +#endif + } + + return 0; +} diff --git a/app/openssl/crypto/o_init.c b/app/openssl/crypto/o_init.c new file mode 100644 index 00000000..db4cdc44 --- /dev/null +++ b/app/openssl/crypto/o_init.c @@ -0,0 +1,82 @@ +/* o_init.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include +#ifdef OPENSSL_FIPS +#include +#include +#endif + +/* Perform any essential OpenSSL initialization operations. + * Currently only sets FIPS callbacks + */ + +void OPENSSL_init(void) + { + static int done = 0; + if (done) + return; + done = 1; +#ifdef OPENSSL_FIPS + FIPS_set_locking_callbacks(CRYPTO_lock, CRYPTO_add_lock); + FIPS_set_error_callbacks(ERR_put_error, ERR_add_error_vdata); + FIPS_set_malloc_callbacks(CRYPTO_malloc, CRYPTO_free); + RAND_init_fips(); +#endif +#if 0 + fprintf(stderr, "Called OPENSSL_init\n"); +#endif + } + diff --git a/app/openssl/crypto/objects/o_names.c b/app/openssl/crypto/objects/o_names.c index 84380a96..4a548c2e 100644 --- a/app/openssl/crypto/objects/o_names.c +++ b/app/openssl/crypto/objects/o_names.c @@ -73,7 +73,7 @@ int OBJ_NAME_new_index(unsigned long (*hash_func)(const char *), name_funcs_stack=sk_NAME_FUNCS_new_null(); MemCheck_on(); } - if ((name_funcs_stack == NULL)) + if (name_funcs_stack == NULL) { /* ERROR */ return(0); diff --git a/app/openssl/crypto/objects/obj_dat.h b/app/openssl/crypto/objects/obj_dat.h index 6449be60..d404ad07 100644 --- a/app/openssl/crypto/objects/obj_dat.h +++ b/app/openssl/crypto/objects/obj_dat.h @@ -62,12 +62,12 @@ * [including the GNU Public Licence.] */ -#define NUM_NID 893 -#define NUM_SN 886 -#define NUM_LN 886 -#define NUM_OBJ 840 +#define NUM_NID 920 +#define NUM_SN 913 +#define NUM_LN 913 +#define NUM_OBJ 857 -static const unsigned char lvalues[5824]={ +static const unsigned char lvalues[5980]={ 0x00, /* [ 0] OBJ_undef */ 0x2A,0x86,0x48,0x86,0xF7,0x0D, /* [ 1] OBJ_rsadsi */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01, /* [ 7] OBJ_pkcs */ @@ -908,6 +908,23 @@ static const unsigned char lvalues[5824]={ 0x55,0x04,0x34, /* [5814] OBJ_supportedAlgorithms */ 0x55,0x04,0x35, /* [5817] OBJ_deltaRevocationList */ 0x55,0x04,0x36, /* [5820] OBJ_dmdName */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x09,0x10,0x03,0x09,/* [5823] OBJ_id_alg_PWRI_KEK */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x06,/* [5834] OBJ_aes_128_gcm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x07,/* [5843] OBJ_aes_128_ccm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x08,/* [5852] OBJ_id_aes128_wrap_pad */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x1A,/* [5861] OBJ_aes_192_gcm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x1B,/* [5870] OBJ_aes_192_ccm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x1C,/* [5879] OBJ_id_aes192_wrap_pad */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x2E,/* [5888] OBJ_aes_256_gcm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x2F,/* [5897] OBJ_aes_256_ccm */ +0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x01,0x30,/* [5906] OBJ_id_aes256_wrap_pad */ +0x2A,0x83,0x08,0x8C,0x9A,0x4B,0x3D,0x01,0x01,0x03,0x02,/* [5915] OBJ_id_camellia128_wrap */ +0x2A,0x83,0x08,0x8C,0x9A,0x4B,0x3D,0x01,0x01,0x03,0x03,/* [5926] OBJ_id_camellia192_wrap */ +0x2A,0x83,0x08,0x8C,0x9A,0x4B,0x3D,0x01,0x01,0x03,0x04,/* [5937] OBJ_id_camellia256_wrap */ +0x55,0x1D,0x25,0x00, /* [5948] OBJ_anyExtendedKeyUsage */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x08,/* [5952] OBJ_mgf1 */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x0A,/* [5961] OBJ_rsassaPss */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x07,/* [5970] OBJ_rsaesOaep */ }; static const ASN1_OBJECT nid_objs[NUM_NID]={ @@ -2351,28 +2368,74 @@ static const ASN1_OBJECT nid_objs[NUM_NID]={ {"deltaRevocationList","deltaRevocationList",NID_deltaRevocationList, 3,&(lvalues[5817]),0}, {"dmdName","dmdName",NID_dmdName,3,&(lvalues[5820]),0}, +{"id-alg-PWRI-KEK","id-alg-PWRI-KEK",NID_id_alg_PWRI_KEK,11, + &(lvalues[5823]),0}, +{"CMAC","cmac",NID_cmac,0,NULL,0}, +{"id-aes128-GCM","aes-128-gcm",NID_aes_128_gcm,9,&(lvalues[5834]),0}, +{"id-aes128-CCM","aes-128-ccm",NID_aes_128_ccm,9,&(lvalues[5843]),0}, +{"id-aes128-wrap-pad","id-aes128-wrap-pad",NID_id_aes128_wrap_pad,9, + &(lvalues[5852]),0}, +{"id-aes192-GCM","aes-192-gcm",NID_aes_192_gcm,9,&(lvalues[5861]),0}, +{"id-aes192-CCM","aes-192-ccm",NID_aes_192_ccm,9,&(lvalues[5870]),0}, +{"id-aes192-wrap-pad","id-aes192-wrap-pad",NID_id_aes192_wrap_pad,9, + &(lvalues[5879]),0}, +{"id-aes256-GCM","aes-256-gcm",NID_aes_256_gcm,9,&(lvalues[5888]),0}, +{"id-aes256-CCM","aes-256-ccm",NID_aes_256_ccm,9,&(lvalues[5897]),0}, +{"id-aes256-wrap-pad","id-aes256-wrap-pad",NID_id_aes256_wrap_pad,9, + &(lvalues[5906]),0}, +{"AES-128-CTR","aes-128-ctr",NID_aes_128_ctr,0,NULL,0}, +{"AES-192-CTR","aes-192-ctr",NID_aes_192_ctr,0,NULL,0}, +{"AES-256-CTR","aes-256-ctr",NID_aes_256_ctr,0,NULL,0}, +{"id-camellia128-wrap","id-camellia128-wrap",NID_id_camellia128_wrap, + 11,&(lvalues[5915]),0}, +{"id-camellia192-wrap","id-camellia192-wrap",NID_id_camellia192_wrap, + 11,&(lvalues[5926]),0}, +{"id-camellia256-wrap","id-camellia256-wrap",NID_id_camellia256_wrap, + 11,&(lvalues[5937]),0}, +{"anyExtendedKeyUsage","Any Extended Key Usage", + NID_anyExtendedKeyUsage,4,&(lvalues[5948]),0}, +{"MGF1","mgf1",NID_mgf1,9,&(lvalues[5952]),0}, +{"RSASSA-PSS","rsassaPss",NID_rsassaPss,9,&(lvalues[5961]),0}, +{"AES-128-XTS","aes-128-xts",NID_aes_128_xts,0,NULL,0}, +{"AES-256-XTS","aes-256-xts",NID_aes_256_xts,0,NULL,0}, +{"RC4-HMAC-MD5","rc4-hmac-md5",NID_rc4_hmac_md5,0,NULL,0}, +{"AES-128-CBC-HMAC-SHA1","aes-128-cbc-hmac-sha1", + NID_aes_128_cbc_hmac_sha1,0,NULL,0}, +{"AES-192-CBC-HMAC-SHA1","aes-192-cbc-hmac-sha1", + NID_aes_192_cbc_hmac_sha1,0,NULL,0}, +{"AES-256-CBC-HMAC-SHA1","aes-256-cbc-hmac-sha1", + NID_aes_256_cbc_hmac_sha1,0,NULL,0}, +{"RSAES-OAEP","rsaesOaep",NID_rsaesOaep,9,&(lvalues[5970]),0}, }; static const unsigned int sn_objs[NUM_SN]={ 364, /* "AD_DVCS" */ 419, /* "AES-128-CBC" */ +916, /* "AES-128-CBC-HMAC-SHA1" */ 421, /* "AES-128-CFB" */ 650, /* "AES-128-CFB1" */ 653, /* "AES-128-CFB8" */ +904, /* "AES-128-CTR" */ 418, /* "AES-128-ECB" */ 420, /* "AES-128-OFB" */ +913, /* "AES-128-XTS" */ 423, /* "AES-192-CBC" */ +917, /* "AES-192-CBC-HMAC-SHA1" */ 425, /* "AES-192-CFB" */ 651, /* "AES-192-CFB1" */ 654, /* "AES-192-CFB8" */ +905, /* "AES-192-CTR" */ 422, /* "AES-192-ECB" */ 424, /* "AES-192-OFB" */ 427, /* "AES-256-CBC" */ +918, /* "AES-256-CBC-HMAC-SHA1" */ 429, /* "AES-256-CFB" */ 652, /* "AES-256-CFB1" */ 655, /* "AES-256-CFB8" */ +906, /* "AES-256-CTR" */ 426, /* "AES-256-ECB" */ 428, /* "AES-256-OFB" */ +914, /* "AES-256-XTS" */ 91, /* "BF-CBC" */ 93, /* "BF-CFB" */ 92, /* "BF-ECB" */ @@ -2400,6 +2463,7 @@ static const unsigned int sn_objs[NUM_SN]={ 110, /* "CAST5-CFB" */ 109, /* "CAST5-ECB" */ 111, /* "CAST5-OFB" */ +894, /* "CMAC" */ 13, /* "CN" */ 141, /* "CRLReason" */ 417, /* "CSPName" */ @@ -2451,6 +2515,7 @@ static const unsigned int sn_objs[NUM_SN]={ 4, /* "MD5" */ 114, /* "MD5-SHA1" */ 95, /* "MDC2" */ +911, /* "MGF1" */ 388, /* "Mail" */ 393, /* "NULL" */ 404, /* "NULL" */ @@ -2487,6 +2552,7 @@ static const unsigned int sn_objs[NUM_SN]={ 40, /* "RC2-OFB" */ 5, /* "RC4" */ 97, /* "RC4-40" */ +915, /* "RC4-HMAC-MD5" */ 120, /* "RC5-CBC" */ 122, /* "RC5-CFB" */ 121, /* "RC5-ECB" */ @@ -2507,6 +2573,8 @@ static const unsigned int sn_objs[NUM_SN]={ 668, /* "RSA-SHA256" */ 669, /* "RSA-SHA384" */ 670, /* "RSA-SHA512" */ +919, /* "RSAES-OAEP" */ +912, /* "RSASSA-PSS" */ 777, /* "SEED-CBC" */ 779, /* "SEED-CFB" */ 776, /* "SEED-ECB" */ @@ -2540,6 +2608,7 @@ static const unsigned int sn_objs[NUM_SN]={ 363, /* "ad_timestamping" */ 376, /* "algorithm" */ 405, /* "ansi-X9-62" */ +910, /* "anyExtendedKeyUsage" */ 746, /* "anyPolicy" */ 370, /* "archiveCutoff" */ 484, /* "associatedDomain" */ @@ -2716,14 +2785,27 @@ static const unsigned int sn_objs[NUM_SN]={ 357, /* "id-aca-group" */ 358, /* "id-aca-role" */ 176, /* "id-ad" */ +896, /* "id-aes128-CCM" */ +895, /* "id-aes128-GCM" */ 788, /* "id-aes128-wrap" */ +897, /* "id-aes128-wrap-pad" */ +899, /* "id-aes192-CCM" */ +898, /* "id-aes192-GCM" */ 789, /* "id-aes192-wrap" */ +900, /* "id-aes192-wrap-pad" */ +902, /* "id-aes256-CCM" */ +901, /* "id-aes256-GCM" */ 790, /* "id-aes256-wrap" */ +903, /* "id-aes256-wrap-pad" */ 262, /* "id-alg" */ +893, /* "id-alg-PWRI-KEK" */ 323, /* "id-alg-des40" */ 326, /* "id-alg-dh-pop" */ 325, /* "id-alg-dh-sig-hmac-sha1" */ 324, /* "id-alg-noSignature" */ +907, /* "id-camellia128-wrap" */ +908, /* "id-camellia192-wrap" */ +909, /* "id-camellia256-wrap" */ 268, /* "id-cct" */ 361, /* "id-cct-PKIData" */ 362, /* "id-cct-PKIResponse" */ @@ -3246,6 +3328,7 @@ static const unsigned int ln_objs[NUM_LN]={ 363, /* "AD Time Stamping" */ 405, /* "ANSI X9.62" */ 368, /* "Acceptable OCSP Responses" */ +910, /* "Any Extended Key Usage" */ 664, /* "Any language" */ 177, /* "Authority Information Access" */ 365, /* "Basic OCSP Response" */ @@ -3386,23 +3469,37 @@ static const unsigned int ln_objs[NUM_LN]={ 364, /* "ad dvcs" */ 606, /* "additional verification" */ 419, /* "aes-128-cbc" */ +916, /* "aes-128-cbc-hmac-sha1" */ +896, /* "aes-128-ccm" */ 421, /* "aes-128-cfb" */ 650, /* "aes-128-cfb1" */ 653, /* "aes-128-cfb8" */ +904, /* "aes-128-ctr" */ 418, /* "aes-128-ecb" */ +895, /* "aes-128-gcm" */ 420, /* "aes-128-ofb" */ +913, /* "aes-128-xts" */ 423, /* "aes-192-cbc" */ +917, /* "aes-192-cbc-hmac-sha1" */ +899, /* "aes-192-ccm" */ 425, /* "aes-192-cfb" */ 651, /* "aes-192-cfb1" */ 654, /* "aes-192-cfb8" */ +905, /* "aes-192-ctr" */ 422, /* "aes-192-ecb" */ +898, /* "aes-192-gcm" */ 424, /* "aes-192-ofb" */ 427, /* "aes-256-cbc" */ +918, /* "aes-256-cbc-hmac-sha1" */ +902, /* "aes-256-ccm" */ 429, /* "aes-256-cfb" */ 652, /* "aes-256-cfb1" */ 655, /* "aes-256-cfb8" */ +906, /* "aes-256-ctr" */ 426, /* "aes-256-ecb" */ +901, /* "aes-256-gcm" */ 428, /* "aes-256-ofb" */ +914, /* "aes-256-xts" */ 376, /* "algorithm" */ 484, /* "associatedDomain" */ 485, /* "associatedName" */ @@ -3467,6 +3564,7 @@ static const unsigned int ln_objs[NUM_LN]={ 407, /* "characteristic-two-field" */ 395, /* "clearance" */ 633, /* "cleartext track 2" */ +894, /* "cmac" */ 13, /* "commonName" */ 513, /* "content types" */ 50, /* "contentType" */ @@ -3602,13 +3700,20 @@ static const unsigned int ln_objs[NUM_LN]={ 358, /* "id-aca-role" */ 176, /* "id-ad" */ 788, /* "id-aes128-wrap" */ +897, /* "id-aes128-wrap-pad" */ 789, /* "id-aes192-wrap" */ +900, /* "id-aes192-wrap-pad" */ 790, /* "id-aes256-wrap" */ +903, /* "id-aes256-wrap-pad" */ 262, /* "id-alg" */ +893, /* "id-alg-PWRI-KEK" */ 323, /* "id-alg-des40" */ 326, /* "id-alg-dh-pop" */ 325, /* "id-alg-dh-sig-hmac-sha1" */ 324, /* "id-alg-noSignature" */ +907, /* "id-camellia128-wrap" */ +908, /* "id-camellia192-wrap" */ +909, /* "id-camellia256-wrap" */ 268, /* "id-cct" */ 361, /* "id-cct-PKIData" */ 362, /* "id-cct-PKIResponse" */ @@ -3806,6 +3911,7 @@ static const unsigned int ln_objs[NUM_LN]={ 602, /* "merchant initiated auth" */ 514, /* "message extensions" */ 51, /* "messageDigest" */ +911, /* "mgf1" */ 506, /* "mime-mhs-bodies" */ 505, /* "mime-mhs-headings" */ 488, /* "mobileTelephoneNumber" */ @@ -3889,6 +3995,7 @@ static const unsigned int ln_objs[NUM_LN]={ 40, /* "rc2-ofb" */ 5, /* "rc4" */ 97, /* "rc4-40" */ +915, /* "rc4-hmac-md5" */ 120, /* "rc5-cbc" */ 122, /* "rc5-cfb" */ 121, /* "rc5-ecb" */ @@ -3905,6 +4012,8 @@ static const unsigned int ln_objs[NUM_LN]={ 6, /* "rsaEncryption" */ 644, /* "rsaOAEPEncryptionSET" */ 377, /* "rsaSignature" */ +919, /* "rsaesOaep" */ +912, /* "rsassaPss" */ 124, /* "run length compression" */ 482, /* "sOARecord" */ 155, /* "safeContentsBag" */ @@ -4254,6 +4363,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 96, /* OBJ_mdc2WithRSA 2 5 8 3 100 */ 95, /* OBJ_mdc2 2 5 8 3 101 */ 746, /* OBJ_any_policy 2 5 29 32 0 */ +910, /* OBJ_anyExtendedKeyUsage 2 5 29 37 0 */ 519, /* OBJ_setct_PANData 2 23 42 0 0 */ 520, /* OBJ_setct_PANToken 2 23 42 0 1 */ 521, /* OBJ_setct_PANOnly 2 23 42 0 2 */ @@ -4720,6 +4830,9 @@ static const unsigned int obj_objs[NUM_OBJ]={ 8, /* OBJ_md5WithRSAEncryption 1 2 840 113549 1 1 4 */ 65, /* OBJ_sha1WithRSAEncryption 1 2 840 113549 1 1 5 */ 644, /* OBJ_rsaOAEPEncryptionSET 1 2 840 113549 1 1 6 */ +919, /* OBJ_rsaesOaep 1 2 840 113549 1 1 7 */ +911, /* OBJ_mgf1 1 2 840 113549 1 1 8 */ +912, /* OBJ_rsassaPss 1 2 840 113549 1 1 10 */ 668, /* OBJ_sha256WithRSAEncryption 1 2 840 113549 1 1 11 */ 669, /* OBJ_sha384WithRSAEncryption 1 2 840 113549 1 1 12 */ 670, /* OBJ_sha512WithRSAEncryption 1 2 840 113549 1 1 13 */ @@ -4785,16 +4898,25 @@ static const unsigned int obj_objs[NUM_OBJ]={ 420, /* OBJ_aes_128_ofb128 2 16 840 1 101 3 4 1 3 */ 421, /* OBJ_aes_128_cfb128 2 16 840 1 101 3 4 1 4 */ 788, /* OBJ_id_aes128_wrap 2 16 840 1 101 3 4 1 5 */ +895, /* OBJ_aes_128_gcm 2 16 840 1 101 3 4 1 6 */ +896, /* OBJ_aes_128_ccm 2 16 840 1 101 3 4 1 7 */ +897, /* OBJ_id_aes128_wrap_pad 2 16 840 1 101 3 4 1 8 */ 422, /* OBJ_aes_192_ecb 2 16 840 1 101 3 4 1 21 */ 423, /* OBJ_aes_192_cbc 2 16 840 1 101 3 4 1 22 */ 424, /* OBJ_aes_192_ofb128 2 16 840 1 101 3 4 1 23 */ 425, /* OBJ_aes_192_cfb128 2 16 840 1 101 3 4 1 24 */ 789, /* OBJ_id_aes192_wrap 2 16 840 1 101 3 4 1 25 */ +898, /* OBJ_aes_192_gcm 2 16 840 1 101 3 4 1 26 */ +899, /* OBJ_aes_192_ccm 2 16 840 1 101 3 4 1 27 */ +900, /* OBJ_id_aes192_wrap_pad 2 16 840 1 101 3 4 1 28 */ 426, /* OBJ_aes_256_ecb 2 16 840 1 101 3 4 1 41 */ 427, /* OBJ_aes_256_cbc 2 16 840 1 101 3 4 1 42 */ 428, /* OBJ_aes_256_ofb128 2 16 840 1 101 3 4 1 43 */ 429, /* OBJ_aes_256_cfb128 2 16 840 1 101 3 4 1 44 */ 790, /* OBJ_id_aes256_wrap 2 16 840 1 101 3 4 1 45 */ +901, /* OBJ_aes_256_gcm 2 16 840 1 101 3 4 1 46 */ +902, /* OBJ_aes_256_ccm 2 16 840 1 101 3 4 1 47 */ +903, /* OBJ_id_aes256_wrap_pad 2 16 840 1 101 3 4 1 48 */ 672, /* OBJ_sha256 2 16 840 1 101 3 4 2 1 */ 673, /* OBJ_sha384 2 16 840 1 101 3 4 2 2 */ 674, /* OBJ_sha512 2 16 840 1 101 3 4 2 3 */ @@ -4901,6 +5023,9 @@ static const unsigned int obj_objs[NUM_OBJ]={ 751, /* OBJ_camellia_128_cbc 1 2 392 200011 61 1 1 1 2 */ 752, /* OBJ_camellia_192_cbc 1 2 392 200011 61 1 1 1 3 */ 753, /* OBJ_camellia_256_cbc 1 2 392 200011 61 1 1 1 4 */ +907, /* OBJ_id_camellia128_wrap 1 2 392 200011 61 1 1 3 2 */ +908, /* OBJ_id_camellia192_wrap 1 2 392 200011 61 1 1 3 3 */ +909, /* OBJ_id_camellia256_wrap 1 2 392 200011 61 1 1 3 4 */ 196, /* OBJ_id_smime_mod_cms 1 2 840 113549 1 9 16 0 1 */ 197, /* OBJ_id_smime_mod_ess 1 2 840 113549 1 9 16 0 2 */ 198, /* OBJ_id_smime_mod_oid 1 2 840 113549 1 9 16 0 3 */ @@ -4956,6 +5081,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 246, /* OBJ_id_smime_alg_CMS3DESwrap 1 2 840 113549 1 9 16 3 6 */ 247, /* OBJ_id_smime_alg_CMSRC2wrap 1 2 840 113549 1 9 16 3 7 */ 125, /* OBJ_zlib_compression 1 2 840 113549 1 9 16 3 8 */ +893, /* OBJ_id_alg_PWRI_KEK 1 2 840 113549 1 9 16 3 9 */ 248, /* OBJ_id_smime_cd_ldap 1 2 840 113549 1 9 16 4 1 */ 249, /* OBJ_id_smime_spq_ets_sqt_uri 1 2 840 113549 1 9 16 5 1 */ 250, /* OBJ_id_smime_spq_ets_sqt_unotice 1 2 840 113549 1 9 16 5 2 */ diff --git a/app/openssl/crypto/objects/obj_mac.h b/app/openssl/crypto/objects/obj_mac.h index 282f11a8..b5ea7cda 100644 --- a/app/openssl/crypto/objects/obj_mac.h +++ b/app/openssl/crypto/objects/obj_mac.h @@ -580,6 +580,21 @@ #define NID_sha1WithRSAEncryption 65 #define OBJ_sha1WithRSAEncryption OBJ_pkcs1,5L +#define SN_rsaesOaep "RSAES-OAEP" +#define LN_rsaesOaep "rsaesOaep" +#define NID_rsaesOaep 919 +#define OBJ_rsaesOaep OBJ_pkcs1,7L + +#define SN_mgf1 "MGF1" +#define LN_mgf1 "mgf1" +#define NID_mgf1 911 +#define OBJ_mgf1 OBJ_pkcs1,8L + +#define SN_rsassaPss "RSASSA-PSS" +#define LN_rsassaPss "rsassaPss" +#define NID_rsassaPss 912 +#define OBJ_rsassaPss OBJ_pkcs1,10L + #define SN_sha256WithRSAEncryption "RSA-SHA256" #define LN_sha256WithRSAEncryption "sha256WithRSAEncryption" #define NID_sha256WithRSAEncryption 668 @@ -981,6 +996,10 @@ #define NID_id_smime_alg_CMSRC2wrap 247 #define OBJ_id_smime_alg_CMSRC2wrap OBJ_id_smime_alg,7L +#define SN_id_alg_PWRI_KEK "id-alg-PWRI-KEK" +#define NID_id_alg_PWRI_KEK 893 +#define OBJ_id_alg_PWRI_KEK OBJ_id_smime_alg,9L + #define SN_id_smime_cd_ldap "id-smime-cd-ldap" #define NID_id_smime_cd_ldap 248 #define OBJ_id_smime_cd_ldap OBJ_id_smime_cd,1L @@ -2399,6 +2418,11 @@ #define NID_no_rev_avail 403 #define OBJ_no_rev_avail OBJ_id_ce,56L +#define SN_anyExtendedKeyUsage "anyExtendedKeyUsage" +#define LN_anyExtendedKeyUsage "Any Extended Key Usage" +#define NID_anyExtendedKeyUsage 910 +#define OBJ_anyExtendedKeyUsage OBJ_ext_key_usage,0L + #define SN_netscape "Netscape" #define LN_netscape "Netscape Communications Corp." #define NID_netscape 57 @@ -2586,6 +2610,24 @@ #define NID_aes_128_cfb128 421 #define OBJ_aes_128_cfb128 OBJ_aes,4L +#define SN_id_aes128_wrap "id-aes128-wrap" +#define NID_id_aes128_wrap 788 +#define OBJ_id_aes128_wrap OBJ_aes,5L + +#define SN_aes_128_gcm "id-aes128-GCM" +#define LN_aes_128_gcm "aes-128-gcm" +#define NID_aes_128_gcm 895 +#define OBJ_aes_128_gcm OBJ_aes,6L + +#define SN_aes_128_ccm "id-aes128-CCM" +#define LN_aes_128_ccm "aes-128-ccm" +#define NID_aes_128_ccm 896 +#define OBJ_aes_128_ccm OBJ_aes,7L + +#define SN_id_aes128_wrap_pad "id-aes128-wrap-pad" +#define NID_id_aes128_wrap_pad 897 +#define OBJ_id_aes128_wrap_pad OBJ_aes,8L + #define SN_aes_192_ecb "AES-192-ECB" #define LN_aes_192_ecb "aes-192-ecb" #define NID_aes_192_ecb 422 @@ -2606,6 +2648,24 @@ #define NID_aes_192_cfb128 425 #define OBJ_aes_192_cfb128 OBJ_aes,24L +#define SN_id_aes192_wrap "id-aes192-wrap" +#define NID_id_aes192_wrap 789 +#define OBJ_id_aes192_wrap OBJ_aes,25L + +#define SN_aes_192_gcm "id-aes192-GCM" +#define LN_aes_192_gcm "aes-192-gcm" +#define NID_aes_192_gcm 898 +#define OBJ_aes_192_gcm OBJ_aes,26L + +#define SN_aes_192_ccm "id-aes192-CCM" +#define LN_aes_192_ccm "aes-192-ccm" +#define NID_aes_192_ccm 899 +#define OBJ_aes_192_ccm OBJ_aes,27L + +#define SN_id_aes192_wrap_pad "id-aes192-wrap-pad" +#define NID_id_aes192_wrap_pad 900 +#define OBJ_id_aes192_wrap_pad OBJ_aes,28L + #define SN_aes_256_ecb "AES-256-ECB" #define LN_aes_256_ecb "aes-256-ecb" #define NID_aes_256_ecb 426 @@ -2626,6 +2686,24 @@ #define NID_aes_256_cfb128 429 #define OBJ_aes_256_cfb128 OBJ_aes,44L +#define SN_id_aes256_wrap "id-aes256-wrap" +#define NID_id_aes256_wrap 790 +#define OBJ_id_aes256_wrap OBJ_aes,45L + +#define SN_aes_256_gcm "id-aes256-GCM" +#define LN_aes_256_gcm "aes-256-gcm" +#define NID_aes_256_gcm 901 +#define OBJ_aes_256_gcm OBJ_aes,46L + +#define SN_aes_256_ccm "id-aes256-CCM" +#define LN_aes_256_ccm "aes-256-ccm" +#define NID_aes_256_ccm 902 +#define OBJ_aes_256_ccm OBJ_aes,47L + +#define SN_id_aes256_wrap_pad "id-aes256-wrap-pad" +#define NID_id_aes256_wrap_pad 903 +#define OBJ_id_aes256_wrap_pad OBJ_aes,48L + #define SN_aes_128_cfb1 "AES-128-CFB1" #define LN_aes_128_cfb1 "aes-128-cfb1" #define NID_aes_128_cfb1 650 @@ -2650,6 +2728,26 @@ #define LN_aes_256_cfb8 "aes-256-cfb8" #define NID_aes_256_cfb8 655 +#define SN_aes_128_ctr "AES-128-CTR" +#define LN_aes_128_ctr "aes-128-ctr" +#define NID_aes_128_ctr 904 + +#define SN_aes_192_ctr "AES-192-CTR" +#define LN_aes_192_ctr "aes-192-ctr" +#define NID_aes_192_ctr 905 + +#define SN_aes_256_ctr "AES-256-CTR" +#define LN_aes_256_ctr "aes-256-ctr" +#define NID_aes_256_ctr 906 + +#define SN_aes_128_xts "AES-128-XTS" +#define LN_aes_128_xts "aes-128-xts" +#define NID_aes_128_xts 913 + +#define SN_aes_256_xts "AES-256-XTS" +#define LN_aes_256_xts "aes-256-xts" +#define NID_aes_256_xts 914 + #define SN_des_cfb1 "DES-CFB1" #define LN_des_cfb1 "des-cfb1" #define NID_des_cfb1 656 @@ -2666,18 +2764,6 @@ #define LN_des_ede3_cfb8 "des-ede3-cfb8" #define NID_des_ede3_cfb8 659 -#define SN_id_aes128_wrap "id-aes128-wrap" -#define NID_id_aes128_wrap 788 -#define OBJ_id_aes128_wrap OBJ_aes,5L - -#define SN_id_aes192_wrap "id-aes192-wrap" -#define NID_id_aes192_wrap 789 -#define OBJ_id_aes192_wrap OBJ_aes,25L - -#define SN_id_aes256_wrap "id-aes256-wrap" -#define NID_id_aes256_wrap 790 -#define OBJ_id_aes256_wrap OBJ_aes,45L - #define OBJ_nist_hashalgs OBJ_nistAlgorithms,2L #define SN_sha256 "SHA256" @@ -3810,6 +3896,18 @@ #define NID_camellia_256_cbc 753 #define OBJ_camellia_256_cbc 1L,2L,392L,200011L,61L,1L,1L,1L,4L +#define SN_id_camellia128_wrap "id-camellia128-wrap" +#define NID_id_camellia128_wrap 907 +#define OBJ_id_camellia128_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,2L + +#define SN_id_camellia192_wrap "id-camellia192-wrap" +#define NID_id_camellia192_wrap 908 +#define OBJ_id_camellia192_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,3L + +#define SN_id_camellia256_wrap "id-camellia256-wrap" +#define NID_id_camellia256_wrap 909 +#define OBJ_id_camellia256_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,4L + #define OBJ_ntt_ds 0L,3L,4401L,5L #define OBJ_camellia OBJ_ntt_ds,3L,1L,9L @@ -3912,3 +4010,23 @@ #define LN_hmac "hmac" #define NID_hmac 855 +#define SN_cmac "CMAC" +#define LN_cmac "cmac" +#define NID_cmac 894 + +#define SN_rc4_hmac_md5 "RC4-HMAC-MD5" +#define LN_rc4_hmac_md5 "rc4-hmac-md5" +#define NID_rc4_hmac_md5 915 + +#define SN_aes_128_cbc_hmac_sha1 "AES-128-CBC-HMAC-SHA1" +#define LN_aes_128_cbc_hmac_sha1 "aes-128-cbc-hmac-sha1" +#define NID_aes_128_cbc_hmac_sha1 916 + +#define SN_aes_192_cbc_hmac_sha1 "AES-192-CBC-HMAC-SHA1" +#define LN_aes_192_cbc_hmac_sha1 "aes-192-cbc-hmac-sha1" +#define NID_aes_192_cbc_hmac_sha1 917 + +#define SN_aes_256_cbc_hmac_sha1 "AES-256-CBC-HMAC-SHA1" +#define LN_aes_256_cbc_hmac_sha1 "aes-256-cbc-hmac-sha1" +#define NID_aes_256_cbc_hmac_sha1 918 + diff --git a/app/openssl/crypto/objects/obj_mac.num b/app/openssl/crypto/objects/obj_mac.num index 8c50aac2..1d0a7c80 100644 --- a/app/openssl/crypto/objects/obj_mac.num +++ b/app/openssl/crypto/objects/obj_mac.num @@ -890,3 +890,30 @@ houseIdentifier 889 supportedAlgorithms 890 deltaRevocationList 891 dmdName 892 +id_alg_PWRI_KEK 893 +cmac 894 +aes_128_gcm 895 +aes_128_ccm 896 +id_aes128_wrap_pad 897 +aes_192_gcm 898 +aes_192_ccm 899 +id_aes192_wrap_pad 900 +aes_256_gcm 901 +aes_256_ccm 902 +id_aes256_wrap_pad 903 +aes_128_ctr 904 +aes_192_ctr 905 +aes_256_ctr 906 +id_camellia128_wrap 907 +id_camellia192_wrap 908 +id_camellia256_wrap 909 +anyExtendedKeyUsage 910 +mgf1 911 +rsassaPss 912 +aes_128_xts 913 +aes_256_xts 914 +rc4_hmac_md5 915 +aes_128_cbc_hmac_sha1 916 +aes_192_cbc_hmac_sha1 917 +aes_256_cbc_hmac_sha1 918 +rsaesOaep 919 diff --git a/app/openssl/crypto/objects/obj_xref.c b/app/openssl/crypto/objects/obj_xref.c index 152eca5c..9f744bce 100644 --- a/app/openssl/crypto/objects/obj_xref.c +++ b/app/openssl/crypto/objects/obj_xref.c @@ -110,8 +110,10 @@ int OBJ_find_sigid_algs(int signid, int *pdig_nid, int *ppkey_nid) #endif if (rv == NULL) return 0; - *pdig_nid = rv->hash_id; - *ppkey_nid = rv->pkey_id; + if (pdig_nid) + *pdig_nid = rv->hash_id; + if (ppkey_nid) + *ppkey_nid = rv->pkey_id; return 1; } @@ -144,7 +146,8 @@ int OBJ_find_sigid_by_algs(int *psignid, int dig_nid, int pkey_nid) #endif if (rv == NULL) return 0; - *psignid = (*rv)->sign_id; + if (psignid) + *psignid = (*rv)->sign_id; return 1; } diff --git a/app/openssl/crypto/objects/obj_xref.h b/app/openssl/crypto/objects/obj_xref.h index d5b9b8e1..e23938c2 100644 --- a/app/openssl/crypto/objects/obj_xref.h +++ b/app/openssl/crypto/objects/obj_xref.h @@ -38,10 +38,12 @@ static const nid_triple sigoid_srt[] = {NID_id_GostR3411_94_with_GostR3410_94, NID_id_GostR3411_94, NID_id_GostR3410_94}, {NID_id_GostR3411_94_with_GostR3410_94_cc, NID_id_GostR3411_94, NID_id_GostR3410_94_cc}, {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94, NID_id_GostR3410_2001_cc}, + {NID_rsassaPss, NID_undef, NID_rsaEncryption}, }; static const nid_triple * const sigoid_srt_xref[] = { + &sigoid_srt[29], &sigoid_srt[17], &sigoid_srt[18], &sigoid_srt[0], diff --git a/app/openssl/crypto/objects/obj_xref.txt b/app/openssl/crypto/objects/obj_xref.txt index e45b3d34..cb917182 100644 --- a/app/openssl/crypto/objects/obj_xref.txt +++ b/app/openssl/crypto/objects/obj_xref.txt @@ -13,6 +13,10 @@ sha512WithRSAEncryption sha512 rsaEncryption sha224WithRSAEncryption sha224 rsaEncryption mdc2WithRSA mdc2 rsaEncryption ripemd160WithRSA ripemd160 rsaEncryption +# For PSS the digest algorithm can vary and depends on the included +# AlgorithmIdentifier. The digest "undef" indicates the public key +# method should handle this explicitly. +rsassaPss undef rsaEncryption # Alternative deprecated OIDs. By using the older "rsa" OID this # type will be recognized by not normally used. diff --git a/app/openssl/crypto/objects/objects.txt b/app/openssl/crypto/objects/objects.txt index e61fe60c..d3bfad72 100644 --- a/app/openssl/crypto/objects/objects.txt +++ b/app/openssl/crypto/objects/objects.txt @@ -166,6 +166,10 @@ pkcs1 3 : RSA-MD4 : md4WithRSAEncryption pkcs1 4 : RSA-MD5 : md5WithRSAEncryption pkcs1 5 : RSA-SHA1 : sha1WithRSAEncryption # According to PKCS #1 version 2.1 +pkcs1 7 : RSAES-OAEP : rsaesOaep +pkcs1 8 : MGF1 : mgf1 +pkcs1 10 : RSASSA-PSS : rsassaPss + pkcs1 11 : RSA-SHA256 : sha256WithRSAEncryption pkcs1 12 : RSA-SHA384 : sha384WithRSAEncryption pkcs1 13 : RSA-SHA512 : sha512WithRSAEncryption @@ -299,6 +303,7 @@ id-smime-alg 4 : id-smime-alg-RC2wrap id-smime-alg 5 : id-smime-alg-ESDH id-smime-alg 6 : id-smime-alg-CMS3DESwrap id-smime-alg 7 : id-smime-alg-CMSRC2wrap +id-smime-alg 9 : id-alg-PWRI-KEK # S/MIME Certificate Distribution id-smime-cd 1 : id-smime-cd-ldap @@ -770,6 +775,10 @@ id-ce 55 : targetInformation : X509v3 AC Targeting !Cname no-rev-avail id-ce 56 : noRevAvail : X509v3 No Revocation Available +# From RFC5280 +ext-key-usage 0 : anyExtendedKeyUsage : Any Extended Key Usage + + !Cname netscape 2 16 840 1 113730 : Netscape : Netscape Communications Corp. !Cname netscape-cert-extension @@ -846,6 +855,10 @@ aes 2 : AES-128-CBC : aes-128-cbc aes 3 : AES-128-OFB : aes-128-ofb !Cname aes-128-cfb128 aes 4 : AES-128-CFB : aes-128-cfb +aes 5 : id-aes128-wrap +aes 6 : id-aes128-GCM : aes-128-gcm +aes 7 : id-aes128-CCM : aes-128-ccm +aes 8 : id-aes128-wrap-pad aes 21 : AES-192-ECB : aes-192-ecb aes 22 : AES-192-CBC : aes-192-cbc @@ -853,6 +866,10 @@ aes 22 : AES-192-CBC : aes-192-cbc aes 23 : AES-192-OFB : aes-192-ofb !Cname aes-192-cfb128 aes 24 : AES-192-CFB : aes-192-cfb +aes 25 : id-aes192-wrap +aes 26 : id-aes192-GCM : aes-192-gcm +aes 27 : id-aes192-CCM : aes-192-ccm +aes 28 : id-aes192-wrap-pad aes 41 : AES-256-ECB : aes-256-ecb aes 42 : AES-256-CBC : aes-256-cbc @@ -860,6 +877,10 @@ aes 42 : AES-256-CBC : aes-256-cbc aes 43 : AES-256-OFB : aes-256-ofb !Cname aes-256-cfb128 aes 44 : AES-256-CFB : aes-256-cfb +aes 45 : id-aes256-wrap +aes 46 : id-aes256-GCM : aes-256-gcm +aes 47 : id-aes256-CCM : aes-256-ccm +aes 48 : id-aes256-wrap-pad # There are no OIDs for these modes... @@ -869,15 +890,16 @@ aes 44 : AES-256-CFB : aes-256-cfb : AES-128-CFB8 : aes-128-cfb8 : AES-192-CFB8 : aes-192-cfb8 : AES-256-CFB8 : aes-256-cfb8 + : AES-128-CTR : aes-128-ctr + : AES-192-CTR : aes-192-ctr + : AES-256-CTR : aes-256-ctr + : AES-128-XTS : aes-128-xts + : AES-256-XTS : aes-256-xts : DES-CFB1 : des-cfb1 : DES-CFB8 : des-cfb8 : DES-EDE3-CFB1 : des-ede3-cfb1 : DES-EDE3-CFB8 : des-ede3-cfb8 -aes 5 : id-aes128-wrap -aes 25 : id-aes192-wrap -aes 45 : id-aes256-wrap - # OIDs for SHA224, SHA256, SHA385 and SHA512, according to x9.84. !Alias nist_hashalgs nistAlgorithms 2 nist_hashalgs 1 : SHA256 : sha256 @@ -1211,6 +1233,9 @@ cryptocom 1 8 1 : id-GostR3410-2001-ParamSet-cc : GOST R 3410-2001 Parameter Se 1 2 392 200011 61 1 1 1 2 : CAMELLIA-128-CBC : camellia-128-cbc 1 2 392 200011 61 1 1 1 3 : CAMELLIA-192-CBC : camellia-192-cbc 1 2 392 200011 61 1 1 1 4 : CAMELLIA-256-CBC : camellia-256-cbc +1 2 392 200011 61 1 1 3 2 : id-camellia128-wrap +1 2 392 200011 61 1 1 3 3 : id-camellia192-wrap +1 2 392 200011 61 1 1 3 4 : id-camellia256-wrap # Definitions for Camellia cipher - ECB, CFB, OFB MODE @@ -1257,3 +1282,11 @@ kisa 1 6 : SEED-OFB : seed-ofb # There is no OID that just denotes "HMAC" oddly enough... : HMAC : hmac +# Nor CMAC either + : CMAC : cmac + +# Synthetic composite ciphersuites + : RC4-HMAC-MD5 : rc4-hmac-md5 + : AES-128-CBC-HMAC-SHA1 : aes-128-cbc-hmac-sha1 + : AES-192-CBC-HMAC-SHA1 : aes-192-cbc-hmac-sha1 + : AES-256-CBC-HMAC-SHA1 : aes-256-cbc-hmac-sha1 diff --git a/app/openssl/crypto/ocsp/ocsp.h b/app/openssl/crypto/ocsp/ocsp.h index 31e45744..f14e9f7e 100644 --- a/app/openssl/crypto/ocsp/ocsp.h +++ b/app/openssl/crypto/ocsp/ocsp.h @@ -90,6 +90,13 @@ extern "C" { #define OCSP_RESPID_KEY 0x400 #define OCSP_NOTIME 0x800 +#ifdef OPENSSL_SYS_WIN32 + /* Under Win32 these are defined in wincrypt.h */ +#undef OCSP_REQUEST +#undef X509_NAME +#undef OCSP_RESPONSE +#endif + /* CertID ::= SEQUENCE { * hashAlgorithm AlgorithmIdentifier, * issuerNameHash OCTET STRING, -- Hash of Issuer's DN diff --git a/app/openssl/crypto/ocsp/ocsp_lib.c b/app/openssl/crypto/ocsp/ocsp_lib.c index e92b86c0..a94dc838 100644 --- a/app/openssl/crypto/ocsp/ocsp_lib.c +++ b/app/openssl/crypto/ocsp/ocsp_lib.c @@ -124,7 +124,8 @@ OCSP_CERTID *OCSP_cert_id_new(const EVP_MD *dgst, if (!(ASN1_OCTET_STRING_set(cid->issuerNameHash, md, i))) goto err; /* Calculate the issuerKey hash, excluding tag and length */ - EVP_Digest(issuerKey->data, issuerKey->length, md, &i, dgst, NULL); + if (!EVP_Digest(issuerKey->data, issuerKey->length, md, &i, dgst, NULL)) + goto err; if (!(ASN1_OCTET_STRING_set(cid->issuerKeyHash, md, i))) goto err; diff --git a/app/openssl/crypto/ocsp/ocsp_vfy.c b/app/openssl/crypto/ocsp/ocsp_vfy.c index 415d67e6..27671830 100644 --- a/app/openssl/crypto/ocsp/ocsp_vfy.c +++ b/app/openssl/crypto/ocsp/ocsp_vfy.c @@ -91,9 +91,12 @@ int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, { EVP_PKEY *skey; skey = X509_get_pubkey(signer); - ret = OCSP_BASICRESP_verify(bs, skey, 0); - EVP_PKEY_free(skey); - if(ret <= 0) + if (skey) + { + ret = OCSP_BASICRESP_verify(bs, skey, 0); + EVP_PKEY_free(skey); + } + if(!skey || ret <= 0) { OCSPerr(OCSP_F_OCSP_BASIC_VERIFY, OCSP_R_SIGNATURE_FAILURE); goto end; @@ -108,6 +111,7 @@ int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, init_res = X509_STORE_CTX_init(&ctx, st, signer, bs->certs); if(!init_res) { + ret = -1; OCSPerr(OCSP_F_OCSP_BASIC_VERIFY,ERR_R_X509_LIB); goto end; } diff --git a/app/openssl/crypto/opensslconf-32.h b/app/openssl/crypto/opensslconf-32.h new file mode 100644 index 00000000..d6625489 --- /dev/null +++ b/app/openssl/crypto/opensslconf-32.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#define BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf-64.h b/app/openssl/crypto/opensslconf-64.h new file mode 100644 index 00000000..70c5a2cb --- /dev/null +++ b/app/openssl/crypto/opensslconf-64.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf-static-32.h b/app/openssl/crypto/opensslconf-static-32.h new file mode 100644 index 00000000..d6625489 --- /dev/null +++ b/app/openssl/crypto/opensslconf-static-32.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#define BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf-static-64.h b/app/openssl/crypto/opensslconf-static-64.h new file mode 100644 index 00000000..70c5a2cb --- /dev/null +++ b/app/openssl/crypto/opensslconf-static-64.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf-static-trusty.h b/app/openssl/crypto/opensslconf-static-trusty.h new file mode 100644 index 00000000..06f9f982 --- /dev/null +++ b/app/openssl/crypto/opensslconf-static-trusty.h @@ -0,0 +1,448 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_CMS +# define OPENSSL_NO_CMS +#endif +#ifndef OPENSSL_NO_COMP +# define OPENSSL_NO_COMP +#endif +#ifndef OPENSSL_NO_CONF +# define OPENSSL_NO_CONF +#endif +#ifndef OPENSSL_NO_DES +# define OPENSSL_NO_DES +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_LOCKING +# define OPENSSL_NO_LOCKING +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MD4 +# define OPENSSL_NO_MD4 +#endif +#ifndef OPENSSL_NO_MD5 +# define OPENSSL_NO_MD5 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_OCSP +# define OPENSSL_NO_OCSP +#endif +#ifndef OPENSSL_NO_PEM +# define OPENSSL_NO_PEM +#endif +#ifndef OPENSSL_NO_PKCS12 +# define OPENSSL_NO_PKCS12 +#endif +#ifndef OPENSSL_NO_PQUEUE +# define OPENSSL_NO_PQUEUE +#endif +#ifndef OPENSSL_NO_RC2 +# define OPENSSL_NO_RC2 +#endif +#ifndef OPENSSL_NO_RC4 +# define OPENSSL_NO_RC4 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_SRP +# define OPENSSL_NO_SRP +#endif +#ifndef OPENSSL_NO_SSL2 +# define OPENSSL_NO_SSL2 +#endif +#ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_TLS1 +# define OPENSSL_NO_TLS1 +#endif +#ifndef OPENSSL_NO_TLSEXT +# define OPENSSL_NO_TLSEXT +#endif +#ifndef OPENSSL_NO_TS +# define OPENSSL_NO_TS +#endif +#ifndef OPENSSL_NO_TXT_DB +# define OPENSSL_NO_TXT_DB +#endif +#ifndef OPENSSL_NO_UI +# define OPENSSL_NO_UI +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_NO_ERR +# define OPENSSL_NO_ERR +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS) +# define NO_CMS +# endif +# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP) +# define NO_COMP +# endif +# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF) +# define NO_CONF +# endif +# if defined(OPENSSL_NO_DES) && !defined(NO_DES) +# define NO_DES +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING) +# define NO_LOCKING +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4) +# define NO_MD4 +# endif +# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5) +# define NO_MD5 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP) +# define NO_OCSP +# endif +# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM) +# define NO_PEM +# endif +# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12) +# define NO_PKCS12 +# endif +# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE) +# define NO_PQUEUE +# endif +# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2) +# define NO_RC2 +# endif +# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4) +# define NO_RC4 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP) +# define NO_SRP +# endif +# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2) +# define NO_SSL2 +# endif +# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3) +# define NO_SSL3 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1) +# define NO_TLS1 +# endif +# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT) +# define NO_TLSEXT +# endif +# if defined(OPENSSL_NO_TS) && !defined(NO_TS) +# define NO_TS +# endif +# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB) +# define NO_TXT_DB +# endif +# if defined(OPENSSL_NO_UI) && !defined(NO_UI) +# define NO_UI +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned int +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#undef RC4_CHUNK +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned long +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#undef BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#undef DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf-static.h b/app/openssl/crypto/opensslconf-static.h new file mode 100644 index 00000000..f63a6e0a --- /dev/null +++ b/app/openssl/crypto/opensslconf-static.h @@ -0,0 +1,6 @@ +// Auto-generated - DO NOT EDIT! +#if defined(__LP64__) +#include "opensslconf-static-64.h" +#else +#include "opensslconf-static-32.h" +#endif diff --git a/app/openssl/crypto/opensslconf-trusty.h b/app/openssl/crypto/opensslconf-trusty.h new file mode 100644 index 00000000..06f9f982 --- /dev/null +++ b/app/openssl/crypto/opensslconf-trusty.h @@ -0,0 +1,448 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_CMS +# define OPENSSL_NO_CMS +#endif +#ifndef OPENSSL_NO_COMP +# define OPENSSL_NO_COMP +#endif +#ifndef OPENSSL_NO_CONF +# define OPENSSL_NO_CONF +#endif +#ifndef OPENSSL_NO_DES +# define OPENSSL_NO_DES +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_LOCKING +# define OPENSSL_NO_LOCKING +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MD4 +# define OPENSSL_NO_MD4 +#endif +#ifndef OPENSSL_NO_MD5 +# define OPENSSL_NO_MD5 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_OCSP +# define OPENSSL_NO_OCSP +#endif +#ifndef OPENSSL_NO_PEM +# define OPENSSL_NO_PEM +#endif +#ifndef OPENSSL_NO_PKCS12 +# define OPENSSL_NO_PKCS12 +#endif +#ifndef OPENSSL_NO_PQUEUE +# define OPENSSL_NO_PQUEUE +#endif +#ifndef OPENSSL_NO_RC2 +# define OPENSSL_NO_RC2 +#endif +#ifndef OPENSSL_NO_RC4 +# define OPENSSL_NO_RC4 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_SRP +# define OPENSSL_NO_SRP +#endif +#ifndef OPENSSL_NO_SSL2 +# define OPENSSL_NO_SSL2 +#endif +#ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_TLS1 +# define OPENSSL_NO_TLS1 +#endif +#ifndef OPENSSL_NO_TLSEXT +# define OPENSSL_NO_TLSEXT +#endif +#ifndef OPENSSL_NO_TS +# define OPENSSL_NO_TS +#endif +#ifndef OPENSSL_NO_TXT_DB +# define OPENSSL_NO_TXT_DB +#endif +#ifndef OPENSSL_NO_UI +# define OPENSSL_NO_UI +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_NO_ERR +# define OPENSSL_NO_ERR +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS) +# define NO_CMS +# endif +# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP) +# define NO_COMP +# endif +# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF) +# define NO_CONF +# endif +# if defined(OPENSSL_NO_DES) && !defined(NO_DES) +# define NO_DES +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING) +# define NO_LOCKING +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4) +# define NO_MD4 +# endif +# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5) +# define NO_MD5 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP) +# define NO_OCSP +# endif +# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM) +# define NO_PEM +# endif +# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12) +# define NO_PKCS12 +# endif +# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE) +# define NO_PQUEUE +# endif +# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2) +# define NO_RC2 +# endif +# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4) +# define NO_RC4 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP) +# define NO_SRP +# endif +# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2) +# define NO_SSL2 +# endif +# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3) +# define NO_SSL3 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1) +# define NO_TLS1 +# endif +# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT) +# define NO_TLSEXT +# endif +# if defined(OPENSSL_NO_TS) && !defined(NO_TS) +# define NO_TS +# endif +# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB) +# define NO_TXT_DB +# endif +# if defined(OPENSSL_NO_UI) && !defined(NO_UI) +# define NO_UI +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned int +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#undef RC4_CHUNK +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned long +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#undef BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#undef DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslconf.h b/app/openssl/crypto/opensslconf.h index 26ac6ba3..94212a08 100644 --- a/app/openssl/crypto/opensslconf.h +++ b/app/openssl/crypto/opensslconf.h @@ -1,250 +1,10 @@ -/* opensslconf.h */ -/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ - -/* OpenSSL was configured with the following options: */ -#ifndef OPENSSL_DOING_MAKEDEPEND - - -#ifndef OPENSSL_NO_CAST -# define OPENSSL_NO_CAST +// Auto-generated - DO NOT EDIT! +#ifndef OPENSSL_SYS_TRUSTY +#if defined(__LP64__) +#include "opensslconf-64.h" +#else +#include "opensslconf-32.h" #endif -#ifndef OPENSSL_NO_GMP -# define OPENSSL_NO_GMP +#else +#include "opensslconf-trusty.h" #endif -#ifndef OPENSSL_NO_IDEA -# define OPENSSL_NO_IDEA -#endif -#ifndef OPENSSL_NO_JPAKE -# define OPENSSL_NO_JPAKE -#endif -#ifndef OPENSSL_NO_KRB5 -# define OPENSSL_NO_KRB5 -#endif -#ifndef OPENSSL_NO_MD2 -# define OPENSSL_NO_MD2 -#endif -#ifndef OPENSSL_NO_RC5 -# define OPENSSL_NO_RC5 -#endif -#ifndef OPENSSL_NO_RFC3779 -# define OPENSSL_NO_RFC3779 -#endif -#ifndef OPENSSL_NO_SEED -# define OPENSSL_NO_SEED -#endif -#ifndef OPENSSL_NO_SHA0 -# define OPENSSL_NO_SHA0 -#endif -#ifndef OPENSSL_NO_STORE -# define OPENSSL_NO_STORE -#endif -#ifndef OPENSSL_NO_WHRLPOOL -# define OPENSSL_NO_WHRLPOOL -#endif - -#endif /* OPENSSL_DOING_MAKEDEPEND */ - -#ifndef OPENSSL_THREADS -# define OPENSSL_THREADS -#endif -#ifndef OPENSSL_NO_DYNAMIC_ENGINE -# define OPENSSL_NO_DYNAMIC_ENGINE -#endif - -/* The OPENSSL_NO_* macros are also defined as NO_* if the application - asks for it. This is a transient feature that is provided for those - who haven't had the time to do the appropriate changes in their - applications. */ -#ifdef OPENSSL_ALGORITHM_DEFINES -# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) -# define NO_CAST -# endif -# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) -# define NO_GMP -# endif -# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) -# define NO_IDEA -# endif -# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) -# define NO_JPAKE -# endif -# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) -# define NO_KRB5 -# endif -# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) -# define NO_MD2 -# endif -# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) -# define NO_RC5 -# endif -# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) -# define NO_RFC3779 -# endif -# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) -# define NO_SEED -# endif -# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) -# define NO_SHA0 -# endif -# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) -# define NO_STORE -# endif -# if defined(OPENSSL_NO_WHRLPOOL) && !defined(NO_WHRLPOOL) -# define NO_WHRLPOOL -# endif -#endif - -/* crypto/opensslconf.h.in */ - -/* Generate 80386 code? */ -#undef I386_ONLY - -#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ -#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) -#define ENGINESDIR "/usr/local/ssl/lib/engines" -#define OPENSSLDIR "/usr/local/ssl" -#endif -#endif - -#undef OPENSSL_UNISTD -#define OPENSSL_UNISTD - -#undef OPENSSL_EXPORT_VAR_AS_FUNCTION - -#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) -#define IDEA_INT unsigned int -#endif - -#if defined(HEADER_MD2_H) && !defined(MD2_INT) -#define MD2_INT unsigned int -#endif - -#if defined(HEADER_RC2_H) && !defined(RC2_INT) -/* I need to put in a mod for the alpha - eay */ -#define RC2_INT unsigned int -#endif - -#if defined(HEADER_RC4_H) -#if !defined(RC4_INT) -/* using int types make the structure larger but make the code faster - * on most boxes I have tested - up to %20 faster. */ -/* - * I don't know what does "most" mean, but declaring "int" is a must on: - * - Intel P6 because partial register stalls are very expensive; - * - elder Alpha because it lacks byte load/store instructions; - */ -#define RC4_INT unsigned char -#endif -#if !defined(RC4_CHUNK) -/* - * This enables code handling data aligned at natural CPU word - * boundary. See crypto/rc4/rc4_enc.c for further details. - */ -#define RC4_CHUNK unsigned long -#endif -#endif - -#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) -/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a - * %20 speed up (longs are 8 bytes, int's are 4). */ -#ifndef DES_LONG -#define DES_LONG unsigned int -#endif -#endif - -#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) -#define CONFIG_HEADER_BN_H -#define BN_LLONG - -/* Should we define BN_DIV2W here? */ - -/* Only one for the following should be defined */ -#undef SIXTY_FOUR_BIT_LONG -#undef SIXTY_FOUR_BIT -#define THIRTY_TWO_BIT -#endif - -#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) -#define CONFIG_HEADER_RC4_LOCL_H -/* if this is defined data[i] is used instead of *data, this is a %20 - * speedup on x86 */ -#undef RC4_INDEX -#endif - -#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) -#define CONFIG_HEADER_BF_LOCL_H -#define BF_PTR -#endif /* HEADER_BF_LOCL_H */ - -#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) -#define CONFIG_HEADER_DES_LOCL_H -#ifndef DES_DEFAULT_OPTIONS -/* the following is tweaked from a config script, that is why it is a - * protected undef/define */ -#ifndef DES_PTR -#undef DES_PTR -#endif - -/* This helps C compiler generate the correct code for multiple functional - * units. It reduces register dependancies at the expense of 2 more - * registers */ -#ifndef DES_RISC1 -#undef DES_RISC1 -#endif - -#ifndef DES_RISC2 -#undef DES_RISC2 -#endif - -#if defined(DES_RISC1) && defined(DES_RISC2) -YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! -#endif - -/* Unroll the inner loop, this sometimes helps, sometimes hinders. - * Very mucy CPU dependant */ -#ifndef DES_UNROLL -#define DES_UNROLL -#endif - -/* These default values were supplied by - * Peter Gutman - * They are only used if nothing else has been defined */ -#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) -/* Special defines which change the way the code is built depending on the - CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find - even newer MIPS CPU's, but at the moment one size fits all for - optimization options. Older Sparc's work better with only UNROLL, but - there's no way to tell at compile time what it is you're running on */ - -#if defined( sun ) /* Newer Sparc's */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#elif defined( __ultrix ) /* Older MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined( __osf1__ ) /* Alpha */ -# define DES_PTR -# define DES_RISC2 -#elif defined ( _AIX ) /* RS6000 */ - /* Unknown */ -#elif defined( __hpux ) /* HP-PA */ - /* Unknown */ -#elif defined( __aux ) /* 68K */ - /* Unknown */ -#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ -# define DES_UNROLL -#elif defined( __sgi ) /* Newer MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#endif /* Systems-specific speed defines */ -#endif - -#endif /* DES_DEFAULT_OPTIONS */ -#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/crypto/opensslv.h b/app/openssl/crypto/opensslv.h index 310a3387..ebe71807 100644 --- a/app/openssl/crypto/opensslv.h +++ b/app/openssl/crypto/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000005fL +#define OPENSSL_VERSION_NUMBER 0x1000107fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0e-fips 6 Sep 2011" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0e 6 Sep 2011" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/crypto/ossl_typ.h b/app/openssl/crypto/ossl_typ.h index 12bd7014..ea9227f6 100644 --- a/app/openssl/crypto/ossl_typ.h +++ b/app/openssl/crypto/ossl_typ.h @@ -91,10 +91,12 @@ typedef struct asn1_string_st ASN1_TIME; typedef struct asn1_string_st ASN1_GENERALIZEDTIME; typedef struct asn1_string_st ASN1_VISIBLESTRING; typedef struct asn1_string_st ASN1_UTF8STRING; +typedef struct asn1_string_st ASN1_STRING; typedef int ASN1_BOOLEAN; typedef int ASN1_NULL; #endif +typedef struct ASN1_ITEM_st ASN1_ITEM; typedef struct asn1_pctx_st ASN1_PCTX; #ifdef OPENSSL_SYS_WIN32 diff --git a/app/openssl/crypto/pariscid.pl b/app/openssl/crypto/pariscid.pl new file mode 100644 index 00000000..bfc56fdc --- /dev/null +++ b/app/openssl/crypto/pariscid.pl @@ -0,0 +1,225 @@ +#!/usr/bin/env perl + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $ST ="std"; +} else { + $LEVEL ="1.1"; + $SIZE_T =4; + $ST ="stw"; +} + +$rp="%r2"; +$sp="%r30"; +$rv="%r28"; + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT OPENSSL_cpuid_setup,ENTRY + .ALIGN 8 +OPENSSL_cpuid_setup + .PROC + .CALLINFO NO_CALLS + .ENTRY + bv ($rp) + .EXIT + nop + .PROCEND + + .EXPORT OPENSSL_rdtsc,ENTRY + .ALIGN 8 +OPENSSL_rdtsc + .PROC + .CALLINFO NO_CALLS + .ENTRY + mfctl %cr16,$rv + bv ($rp) + .EXIT + nop + .PROCEND + + .EXPORT OPENSSL_wipe_cpu,ENTRY + .ALIGN 8 +OPENSSL_wipe_cpu + .PROC + .CALLINFO NO_CALLS + .ENTRY + xor %r0,%r0,%r1 + fcpy,dbl %fr0,%fr4 + xor %r0,%r0,%r19 + fcpy,dbl %fr0,%fr5 + xor %r0,%r0,%r20 + fcpy,dbl %fr0,%fr6 + xor %r0,%r0,%r21 + fcpy,dbl %fr0,%fr7 + xor %r0,%r0,%r22 + fcpy,dbl %fr0,%fr8 + xor %r0,%r0,%r23 + fcpy,dbl %fr0,%fr9 + xor %r0,%r0,%r24 + fcpy,dbl %fr0,%fr10 + xor %r0,%r0,%r25 + fcpy,dbl %fr0,%fr11 + xor %r0,%r0,%r26 + fcpy,dbl %fr0,%fr22 + xor %r0,%r0,%r29 + fcpy,dbl %fr0,%fr23 + xor %r0,%r0,%r31 + fcpy,dbl %fr0,%fr24 + fcpy,dbl %fr0,%fr25 + fcpy,dbl %fr0,%fr26 + fcpy,dbl %fr0,%fr27 + fcpy,dbl %fr0,%fr28 + fcpy,dbl %fr0,%fr29 + fcpy,dbl %fr0,%fr30 + fcpy,dbl %fr0,%fr31 + bv ($rp) + .EXIT + ldo 0($sp),$rv + .PROCEND +___ +{ +my $inp="%r26"; +my $len="%r25"; + +$code.=<<___; + .EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 8 +OPENSSL_cleanse + .PROC + .CALLINFO NO_CALLS + .ENTRY + cmpib,*= 0,$len,L\$done + nop + cmpib,*>>= 15,$len,L\$ittle + ldi $SIZE_T-1,%r1 + +L\$align + and,*<> $inp,%r1,%r28 + b,n L\$aligned + stb %r0,0($inp) + ldo -1($len),$len + b L\$align + ldo 1($inp),$inp + +L\$aligned + andcm $len,%r1,%r28 +L\$ot + $ST %r0,0($inp) + addib,*<> -$SIZE_T,%r28,L\$ot + ldo $SIZE_T($inp),$inp + + and,*<> $len,%r1,$len + b,n L\$done +L\$ittle + stb %r0,0($inp) + addib,*<> -1,$len,L\$ittle + ldo 1($inp),$inp +L\$done + bv ($rp) + .EXIT + nop + .PROCEND +___ +} +{ +my ($out,$cnt,$max)=("%r26","%r25","%r24"); +my ($tick,$lasttick)=("%r23","%r22"); +my ($diff,$lastdiff)=("%r21","%r20"); + +$code.=<<___; + .EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 8 +OPENSSL_instrument_bus + .PROC + .CALLINFO NO_CALLS + .ENTRY + copy $cnt,$rv + mfctl %cr16,$tick + copy $tick,$lasttick + ldi 0,$diff + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) +L\$oop + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + addib,<> -1,$cnt,L\$oop + addi 4,$out,$out + + bv ($rp) + .EXIT + sub $rv,$cnt,$rv + .PROCEND + + .EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR + .ALIGN 8 +OPENSSL_instrument_bus2 + .PROC + .CALLINFO NO_CALLS + .ENTRY + copy $cnt,$rv + sub %r0,$cnt,$cnt + + mfctl %cr16,$tick + copy $tick,$lasttick + ldi 0,$diff + + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick +L\$oop2 + copy $diff,$lastdiff + fdc 0($out) + ldw 0($out),$tick + add $diff,$tick,$tick + stw $tick,0($out) + + addib,= -1,$max,L\$done2 + nop + + mfctl %cr16,$tick + sub $tick,$lasttick,$diff + copy $tick,$lasttick + cmpclr,<> $lastdiff,$diff,$tick + ldi 1,$tick + + ldi 1,%r1 + xor %r1,$tick,$tick + addb,<> $tick,$cnt,L\$oop2 + shladd,l $tick,2,$out,$out +L\$done2 + bv ($rp) + .EXIT + add $rv,$cnt,$rv + .PROCEND +___ +} +$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); +$code =~ s/,\*/,/gm if ($SIZE_T==4); +$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); +print $code; +close STDOUT; + diff --git a/app/openssl/crypto/pem/pem_all.c b/app/openssl/crypto/pem/pem_all.c index 3e7a6093..eac0460e 100644 --- a/app/openssl/crypto/pem/pem_all.c +++ b/app/openssl/crypto/pem/pem_all.c @@ -193,7 +193,61 @@ RSA *PEM_read_RSAPrivateKey(FILE *fp, RSA **rsa, pem_password_cb *cb, #endif +#ifdef OPENSSL_FIPS + +int PEM_write_bio_RSAPrivateKey(BIO *bp, RSA *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + EVP_PKEY_set1_RSA(k, x); + + ret = PEM_write_bio_PrivateKey(bp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write_bio((i2d_of_void *)i2d_RSAPrivateKey, + PEM_STRING_RSA,bp,x,enc,kstr,klen,cb,u); +} + +#ifndef OPENSSL_NO_FP_API +int PEM_write_RSAPrivateKey(FILE *fp, RSA *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + + EVP_PKEY_set1_RSA(k, x); + + ret = PEM_write_PrivateKey(fp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write((i2d_of_void *)i2d_RSAPrivateKey, + PEM_STRING_RSA,fp,x,enc,kstr,klen,cb,u); +} +#endif + +#else + IMPLEMENT_PEM_write_cb_const(RSAPrivateKey, RSA, PEM_STRING_RSA, RSAPrivateKey) + +#endif + IMPLEMENT_PEM_rw_const(RSAPublicKey, RSA, PEM_STRING_RSA_PUBLIC, RSAPublicKey) IMPLEMENT_PEM_rw(RSA_PUBKEY, RSA, PEM_STRING_PUBLIC, RSA_PUBKEY) @@ -223,7 +277,59 @@ DSA *PEM_read_bio_DSAPrivateKey(BIO *bp, DSA **dsa, pem_password_cb *cb, return pkey_get_dsa(pktmp, dsa); /* will free pktmp */ } +#ifdef OPENSSL_FIPS + +int PEM_write_bio_DSAPrivateKey(BIO *bp, DSA *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + EVP_PKEY_set1_DSA(k, x); + + ret = PEM_write_bio_PrivateKey(bp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write_bio((i2d_of_void *)i2d_DSAPrivateKey, + PEM_STRING_DSA,bp,x,enc,kstr,klen,cb,u); +} + +#ifndef OPENSSL_NO_FP_API +int PEM_write_DSAPrivateKey(FILE *fp, DSA *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + EVP_PKEY_set1_DSA(k, x); + ret = PEM_write_PrivateKey(fp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write((i2d_of_void *)i2d_DSAPrivateKey, + PEM_STRING_DSA,fp,x,enc,kstr,klen,cb,u); +} +#endif + +#else + IMPLEMENT_PEM_write_cb_const(DSAPrivateKey, DSA, PEM_STRING_DSA, DSAPrivateKey) + +#endif + IMPLEMENT_PEM_rw(DSA_PUBKEY, DSA, PEM_STRING_PUBLIC, DSA_PUBKEY) #ifndef OPENSSL_NO_FP_API @@ -269,8 +375,63 @@ EC_KEY *PEM_read_bio_ECPrivateKey(BIO *bp, EC_KEY **key, pem_password_cb *cb, IMPLEMENT_PEM_rw_const(ECPKParameters, EC_GROUP, PEM_STRING_ECPARAMETERS, ECPKParameters) + + +#ifdef OPENSSL_FIPS + +int PEM_write_bio_ECPrivateKey(BIO *bp, EC_KEY *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + EVP_PKEY_set1_EC_KEY(k, x); + + ret = PEM_write_bio_PrivateKey(bp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write_bio((i2d_of_void *)i2d_ECPrivateKey, + PEM_STRING_ECPRIVATEKEY, + bp,x,enc,kstr,klen,cb,u); +} + +#ifndef OPENSSL_NO_FP_API +int PEM_write_ECPrivateKey(FILE *fp, EC_KEY *x, const EVP_CIPHER *enc, + unsigned char *kstr, int klen, + pem_password_cb *cb, void *u) +{ + if (FIPS_mode()) + { + EVP_PKEY *k; + int ret; + k = EVP_PKEY_new(); + if (!k) + return 0; + EVP_PKEY_set1_EC_KEY(k, x); + ret = PEM_write_PrivateKey(fp, k, enc, kstr, klen, cb, u); + EVP_PKEY_free(k); + return ret; + } + else + return PEM_ASN1_write((i2d_of_void *)i2d_ECPrivateKey, + PEM_STRING_ECPRIVATEKEY, + fp,x,enc,kstr,klen,cb,u); +} +#endif + +#else + IMPLEMENT_PEM_write_cb(ECPrivateKey, EC_KEY, PEM_STRING_ECPRIVATEKEY, ECPrivateKey) +#endif + IMPLEMENT_PEM_rw(EC_PUBKEY, EC_KEY, PEM_STRING_PUBLIC, EC_PUBKEY) #ifndef OPENSSL_NO_FP_API diff --git a/app/openssl/crypto/pem/pem_info.c b/app/openssl/crypto/pem/pem_info.c index 1b2be527..cc7f24a9 100644 --- a/app/openssl/crypto/pem/pem_info.c +++ b/app/openssl/crypto/pem/pem_info.c @@ -167,6 +167,7 @@ start: #ifndef OPENSSL_NO_RSA if (strcmp(name,PEM_STRING_RSA) == 0) { + d2i=(D2I_OF(void))d2i_RSAPrivateKey; if (xi->x_pkey != NULL) { if (!sk_X509_INFO_push(ret,xi)) goto err; diff --git a/app/openssl/crypto/pem/pem_lib.c b/app/openssl/crypto/pem/pem_lib.c index cfc89a99..5a421fc4 100644 --- a/app/openssl/crypto/pem/pem_lib.c +++ b/app/openssl/crypto/pem/pem_lib.c @@ -394,7 +394,8 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, goto err; /* The 'iv' is used as the iv and as a salt. It is * NOT taken from the BytesToKey function */ - EVP_BytesToKey(enc,EVP_md5(),iv,kstr,klen,1,key,NULL); + if (!EVP_BytesToKey(enc,EVP_md5(),iv,kstr,klen,1,key,NULL)) + goto err; if (kstr == (unsigned char *)buf) OPENSSL_cleanse(buf,PEM_BUFSIZE); @@ -406,12 +407,15 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, /* k=strlen(buf); */ EVP_CIPHER_CTX_init(&ctx); - EVP_EncryptInit_ex(&ctx,enc,NULL,key,iv); - EVP_EncryptUpdate(&ctx,data,&j,data,i); - EVP_EncryptFinal_ex(&ctx,&(data[j]),&i); + ret = 1; + if (!EVP_EncryptInit_ex(&ctx,enc,NULL,key,iv) + || !EVP_EncryptUpdate(&ctx,data,&j,data,i) + || !EVP_EncryptFinal_ex(&ctx,&(data[j]),&i)) + ret = 0; EVP_CIPHER_CTX_cleanup(&ctx); + if (ret == 0) + goto err; i+=j; - ret=1; } else { @@ -459,14 +463,17 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, ebcdic2ascii(buf, buf, klen); #endif - EVP_BytesToKey(cipher->cipher,EVP_md5(),&(cipher->iv[0]), - (unsigned char *)buf,klen,1,key,NULL); + if (!EVP_BytesToKey(cipher->cipher,EVP_md5(),&(cipher->iv[0]), + (unsigned char *)buf,klen,1,key,NULL)) + return 0; j=(int)len; EVP_CIPHER_CTX_init(&ctx); - EVP_DecryptInit_ex(&ctx,cipher->cipher,NULL, key,&(cipher->iv[0])); - EVP_DecryptUpdate(&ctx,data,&i,data,j); - o=EVP_DecryptFinal_ex(&ctx,&(data[i]),&j); + o = EVP_DecryptInit_ex(&ctx,cipher->cipher,NULL, key,&(cipher->iv[0])); + if (o) + o = EVP_DecryptUpdate(&ctx,data,&i,data,j); + if (o) + o = EVP_DecryptFinal_ex(&ctx,&(data[i]),&j); EVP_CIPHER_CTX_cleanup(&ctx); OPENSSL_cleanse((char *)buf,sizeof(buf)); OPENSSL_cleanse((char *)key,sizeof(key)); diff --git a/app/openssl/crypto/pem/pem_seal.c b/app/openssl/crypto/pem/pem_seal.c index 59690b56..b6b4e134 100644 --- a/app/openssl/crypto/pem/pem_seal.c +++ b/app/openssl/crypto/pem/pem_seal.c @@ -96,7 +96,8 @@ int PEM_SealInit(PEM_ENCODE_SEAL_CTX *ctx, EVP_CIPHER *type, EVP_MD *md_type, EVP_EncodeInit(&ctx->encode); EVP_MD_CTX_init(&ctx->md); - EVP_SignInit(&ctx->md,md_type); + if (!EVP_SignInit(&ctx->md,md_type)) + goto err; EVP_CIPHER_CTX_init(&ctx->cipher); ret=EVP_SealInit(&ctx->cipher,type,ek,ekl,iv,pubk,npubk); @@ -163,7 +164,8 @@ int PEM_SealFinal(PEM_ENCODE_SEAL_CTX *ctx, unsigned char *sig, int *sigl, goto err; } - EVP_EncryptFinal_ex(&ctx->cipher,s,(int *)&i); + if (!EVP_EncryptFinal_ex(&ctx->cipher,s,(int *)&i)) + goto err; EVP_EncodeUpdate(&ctx->encode,out,&j,s,i); *outl=j; out+=j; diff --git a/app/openssl/crypto/pem/pvkfmt.c b/app/openssl/crypto/pem/pvkfmt.c index 5f130c45..b1bf71a5 100644 --- a/app/openssl/crypto/pem/pvkfmt.c +++ b/app/openssl/crypto/pem/pvkfmt.c @@ -709,13 +709,16 @@ static int derive_pvk_key(unsigned char *key, const unsigned char *pass, int passlen) { EVP_MD_CTX mctx; + int rv = 1; EVP_MD_CTX_init(&mctx); - EVP_DigestInit_ex(&mctx, EVP_sha1(), NULL); - EVP_DigestUpdate(&mctx, salt, saltlen); - EVP_DigestUpdate(&mctx, pass, passlen); - EVP_DigestFinal_ex(&mctx, key, NULL); + if (!EVP_DigestInit_ex(&mctx, EVP_sha1(), NULL) + || !EVP_DigestUpdate(&mctx, salt, saltlen) + || !EVP_DigestUpdate(&mctx, pass, passlen) + || !EVP_DigestFinal_ex(&mctx, key, NULL)) + rv = 0; + EVP_MD_CTX_cleanup(&mctx); - return 1; + return rv; } @@ -727,11 +730,12 @@ static EVP_PKEY *do_PVK_body(const unsigned char **in, const unsigned char *p = *in; unsigned int magic; unsigned char *enctmp = NULL, *q; + EVP_CIPHER_CTX cctx; + EVP_CIPHER_CTX_init(&cctx); if (saltlen) { char psbuf[PEM_BUFSIZE]; unsigned char keybuf[20]; - EVP_CIPHER_CTX cctx; int enctmplen, inlen; if (cb) inlen=cb(psbuf,PEM_BUFSIZE,0,u); @@ -757,37 +761,41 @@ static EVP_PKEY *do_PVK_body(const unsigned char **in, p += 8; inlen = keylen - 8; q = enctmp + 8; - EVP_CIPHER_CTX_init(&cctx); - EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL); - EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen); - EVP_DecryptFinal_ex(&cctx, q + enctmplen, &enctmplen); + if (!EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL)) + goto err; + if (!EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen)) + goto err; + if (!EVP_DecryptFinal_ex(&cctx, q + enctmplen, &enctmplen)) + goto err; magic = read_ledword((const unsigned char **)&q); if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC) { q = enctmp + 8; memset(keybuf + 5, 0, 11); - EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, - NULL); + if (!EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, + NULL)) + goto err; OPENSSL_cleanse(keybuf, 20); - EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen); - EVP_DecryptFinal_ex(&cctx, q + enctmplen, - &enctmplen); + if (!EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen)) + goto err; + if (!EVP_DecryptFinal_ex(&cctx, q + enctmplen, + &enctmplen)) + goto err; magic = read_ledword((const unsigned char **)&q); if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC) { - EVP_CIPHER_CTX_cleanup(&cctx); PEMerr(PEM_F_DO_PVK_BODY, PEM_R_BAD_DECRYPT); goto err; } } else OPENSSL_cleanse(keybuf, 20); - EVP_CIPHER_CTX_cleanup(&cctx); p = enctmp; } ret = b2i_PrivateKey(&p, keylen); err: + EVP_CIPHER_CTX_cleanup(&cctx); if (enctmp && saltlen) OPENSSL_free(enctmp); return ret; @@ -841,6 +849,8 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel, { int outlen = 24, pklen; unsigned char *p, *salt = NULL; + EVP_CIPHER_CTX cctx; + EVP_CIPHER_CTX_init(&cctx); if (enclevel) outlen += PVK_SALTLEN; pklen = do_i2b(NULL, pk, 0); @@ -885,7 +895,6 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel, { char psbuf[PEM_BUFSIZE]; unsigned char keybuf[20]; - EVP_CIPHER_CTX cctx; int enctmplen, inlen; if (cb) inlen=cb(psbuf,PEM_BUFSIZE,1,u); @@ -902,16 +911,19 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel, if (enclevel == 1) memset(keybuf + 5, 0, 11); p = salt + PVK_SALTLEN + 8; - EVP_CIPHER_CTX_init(&cctx); - EVP_EncryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL); + if (!EVP_EncryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL)) + goto error; OPENSSL_cleanse(keybuf, 20); - EVP_DecryptUpdate(&cctx, p, &enctmplen, p, pklen - 8); - EVP_DecryptFinal_ex(&cctx, p + enctmplen, &enctmplen); - EVP_CIPHER_CTX_cleanup(&cctx); + if (!EVP_DecryptUpdate(&cctx, p, &enctmplen, p, pklen - 8)) + goto error; + if (!EVP_DecryptFinal_ex(&cctx, p + enctmplen, &enctmplen)) + goto error; } + EVP_CIPHER_CTX_cleanup(&cctx); return outlen; error: + EVP_CIPHER_CTX_cleanup(&cctx); return -1; } diff --git a/app/openssl/crypto/perlasm/cbc.pl b/app/openssl/crypto/perlasm/cbc.pl index 6fc25109..24561e75 100644 --- a/app/openssl/crypto/perlasm/cbc.pl +++ b/app/openssl/crypto/perlasm/cbc.pl @@ -150,7 +150,7 @@ sub cbc &set_label("PIC_point"); &blindpop("edx"); &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); - &mov($count,&DWP(0,"ecx",$count,4)) + &mov($count,&DWP(0,"ecx",$count,4)); &add($count,"edx"); &xor("ecx","ecx"); &xor("edx","edx"); diff --git a/app/openssl/crypto/perlasm/ppc-xlate.pl b/app/openssl/crypto/perlasm/ppc-xlate.pl index 4579671c..a3edd982 100755 --- a/app/openssl/crypto/perlasm/ppc-xlate.pl +++ b/app/openssl/crypto/perlasm/ppc-xlate.pl @@ -31,10 +31,9 @@ my $globl = sub { $ret .= ".type $name,\@function"; last; }; - /linux.*64/ && do { $ret .= ".globl .$name\n"; - $ret .= ".type .$name,\@function\n"; + /linux.*64/ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function\n"; $ret .= ".section \".opd\",\"aw\"\n"; - $ret .= ".globl $name\n"; $ret .= ".align 3\n"; $ret .= "$name:\n"; $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; @@ -62,6 +61,14 @@ my $machine = sub { } ".machine $arch"; }; +my $size = sub { + if ($flavour =~ /linux.*32/) + { shift; + ".size " . join(",",@_); + } + else + { ""; } +}; my $asciz = sub { shift; my $line = join(",",@_); diff --git a/app/openssl/crypto/perlasm/x86_64-xlate.pl b/app/openssl/crypto/perlasm/x86_64-xlate.pl index e47116b7..56d9b64b 100755 --- a/app/openssl/crypto/perlasm/x86_64-xlate.pl +++ b/app/openssl/crypto/perlasm/x86_64-xlate.pl @@ -62,12 +62,8 @@ my $flavour = shift; my $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -{ my ($stddev,$stdino,@junk)=stat(STDOUT); - my ($outdev,$outino,@junk)=stat($output); - - open STDOUT,">$output" || die "can't open $output: $!" - if ($stddev!=$outdev || $stdino!=$outino); -} +open STDOUT,">$output" || die "can't open $output: $!" + if (defined($output)); my $gas=1; $gas=0 if ($output =~ /\.asm$/); my $elf=1; $elf=0 if (!$gas); @@ -116,12 +112,16 @@ my %globals; $line = substr($line,@+[0]); $line =~ s/^\s+//; undef $self->{sz}; - if ($self->{op} =~ /^(movz)b.*/) { # movz is pain... + if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain... $self->{op} = $1; - $self->{sz} = "b"; + $self->{sz} = $2; } elsif ($self->{op} =~ /call|jmp/) { $self->{sz} = ""; - } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op)/) { # SSEn + } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn + $self->{sz} = ""; + } elsif ($self->{op} =~ /^v/) { # VEX + $self->{sz} = ""; + } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { $self->{sz} = ""; } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { $self->{op} = $1; @@ -246,35 +246,39 @@ my %globals; $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{label}, new gas requires sign extension... + use integer; + $self->{label} =~ s/(?{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; + if ($gas) { - # Solaris /usr/ccs/bin/as can't handle multiplications - # in $self->{label}, new gas requires sign extension... - use integer; - $self->{label} =~ s/(?{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; - $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); if (defined($self->{index})) { - sprintf "%s%s(%%%s,%%%s,%d)",$self->{asterisk}, - $self->{label},$self->{base}, + sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk}, + $self->{label}, + $self->{base}?"%$self->{base}":"", $self->{index},$self->{scale}; } else { sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; } } else { - %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", q=>"QWORD$PTR" ); + %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", + q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR" ); $self->{label} =~ s/\./\$/g; $self->{label} =~ s/(?{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); - $sz="q" if ($self->{asterisk}); + $sz="q" if ($self->{asterisk} || opcode->mnemonic() eq "movq"); + $sz="l" if (opcode->mnemonic() eq "movd"); if (defined($self->{index})) { - sprintf "%s[%s%s*%d+%s]",$szmap{$sz}, + sprintf "%s[%s%s*%d%s]",$szmap{$sz}, $self->{label}?"$self->{label}+":"", $self->{index},$self->{scale}, - $self->{base}; + $self->{base}?"+$self->{base}":""; } elsif ($self->{base} eq "rip") { sprintf "%s[%s]",$szmap{$sz},$self->{label}; } else { @@ -506,6 +510,12 @@ my %globals; } } elsif ($dir =~ /\.(text|data)/) { $current_segment=".$1"; + } elsif ($dir =~ /\.hidden/) { + if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$line"; } + elsif ($flavour eq "mingw64") { $self->{value} = ""; } + } elsif ($dir =~ /\.comm/) { + $self->{value} = "$dir\t$prefix$line"; + $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); } $line = ""; return $self; @@ -555,7 +565,8 @@ my %globals; $v.=" READONLY"; $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); } elsif ($line=~/\.CRT\$/i) { - $v.=" READONLY DWORD"; + $v.=" READONLY "; + $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; } } $current_segment = $line; @@ -577,7 +588,7 @@ my %globals; $self->{value}="${decor}SEH_end_$current_function->{name}:"; $self->{value}.=":\n" if($masm); } - $self->{value}.="$current_function->{name}\tENDP" if($masm); + $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); undef $current_function; } last; @@ -613,6 +624,19 @@ my %globals; .join(",",@str) if (@str); last; }; + /\.comm/ && do { my @str=split(/,\s*/,$line); + my $v=undef; + if ($nasm) { + $v.="common $prefix@str[0] @str[1]"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = "_DATA"; + $v.="$current_segment\tSEGMENT\n"; + $v.="COMM @str[0]:DWORD:".@str[1]/4; + } + $self->{value} = $v; + last; + }; } $line = ""; } @@ -625,9 +649,133 @@ my %globals; } } +sub rex { + local *opcode=shift; + my ($dst,$src,$rex)=@_; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + push @opcode,($rex|0x40) if ($rex); +} + +# older gas and ml64 don't handle SSE>2 instructions +my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, + "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 ); + +my $movq = sub { # elderly gas can't handle inter-register movq + my $arg = shift; + my @opcode=(0x66); + if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { + my ($src,$dst)=($1,$2); + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,$src,$dst,0x8); + push @opcode,0x0f,0x7e; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + @opcode; + } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { + my ($src,$dst)=($2,$1); + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,$src,$dst,0x8); + push @opcode,0x0f,0x6e; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + @opcode; + } else { + (); + } +}; + +my $pextrd = sub { + if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { + my @opcode=(0x66); + $imm=$1; + $src=$2; + $dst=$3; + if ($dst =~ /%r([0-9]+)d/) { $dst = $1; } + elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; } + rex(\@opcode,$src,$dst); + push @opcode,0x0f,0x3a,0x16; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + push @opcode,$imm; + @opcode; + } else { + (); + } +}; + +my $pinsrd = sub { + if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + $imm=$1; + $src=$2; + $dst=$3; + if ($src =~ /%r([0-9]+)/) { $src = $1; } + elsif ($src =~ /%e/) { $src = $regrm{$src}; } + rex(\@opcode,$dst,$src); + push @opcode,0x0f,0x3a,0x22; + push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M + push @opcode,$imm; + @opcode; + } else { + (); + } +}; + +my $pshufb = sub { + if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$2,$1); + push @opcode,0x0f,0x38,0x00; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + @opcode; + } else { + (); + } +}; + +my $palignr = sub { + if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x3a,0x0f; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + push @opcode,$1; + @opcode; + } else { + (); + } +}; + +my $pclmulqdq = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x3a,0x44; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + +my $rdrand = sub { + if (shift =~ /%[er](\w+)/) { + my @opcode=(); + my $dst=$1; + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,0,$1,8); + push @opcode,0x0f,0xc7,0xf0|($dst&7); + @opcode; + } else { + (); + } +}; + if ($nasm) { print <<___; default rel +%define XMMWORD ___ } elsif ($masm) { print <<___; @@ -644,14 +792,22 @@ while($line=<>) { undef $label; undef $opcode; - undef $sz; undef @args; if ($label=label->re(\$line)) { print $label->out(); } if (directive->re(\$line)) { printf "%s",directive->out(); - } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: while (1) { + } elsif ($opcode=opcode->re(\$line)) { + my $asm = eval("\$".$opcode->mnemonic()); + undef @bytes; + + if ((ref($asm) eq 'CODE') && scalar(@bytes=&$asm($line))) { + print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; + next; + } + + ARGUMENT: while (1) { my $arg; if ($arg=register->re(\$line)) { opcode->size($arg->size()); } @@ -667,19 +823,26 @@ while($line=<>) { $line =~ s/^,\s*//; } # ARGUMENT: - $sz=opcode->size(); - if ($#args>=0) { my $insn; + my $sz=opcode->size(); + if ($gas) { $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); + @args = map($_->out($sz),@args); + printf "\t%s\t%s",$insn,join(",",@args); } else { $insn = $opcode->out(); - $insn .= $sz if (map($_->out() =~ /x?mm/,@args)); + foreach (@args) { + my $arg = $_->out(); + # $insn.=$sz compensates for movq, pinsrw, ... + if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } + if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } + } @args = reverse(@args); undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); + printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); } - printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); } else { printf "\t%s",$opcode->out(); } diff --git a/app/openssl/crypto/perlasm/x86asm.pl b/app/openssl/crypto/perlasm/x86asm.pl index 28080caa..eb543db2 100644 --- a/app/openssl/crypto/perlasm/x86asm.pl +++ b/app/openssl/crypto/perlasm/x86asm.pl @@ -80,6 +80,57 @@ sub ::movq { &::generic("movq",@_); } } +# SSE>2 instructions +my %regrm = ( "eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3, + "esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7 ); +sub ::pextrd +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm); } + else + { &::generic("pextrd",@_); } +} + +sub ::pinsrd +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/) + { &::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm); } + else + { &::generic("pinsrd",@_); } +} + +sub ::pshufb +{ my($dst,$src)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2); } + else + { &::generic("pshufb",@_); } +} + +sub ::palignr +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm); } + else + { &::generic("palignr",@_); } +} + +sub ::pclmulqdq +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm); } + else + { &::generic("pclmulqdq",@_); } +} + +sub ::rdrand +{ my ($dst)=@_; + if ($dst =~ /(e[a-dsd][ixp])/) + { &::data_byte(0x0f,0xc7,0xf0|$regrm{$dst}); } + else + { &::generic("rdrand",@_); } +} + # label management $lbdecor="L"; # local label decoration, set by package $label="000"; @@ -167,7 +218,7 @@ sub ::asm_init $filename=$fn; $i386=$cpu; - $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=0; + $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; if (($type eq "elf")) { $elf=1; require "x86gas.pl"; } elsif (($type eq "a\.out")) @@ -184,6 +235,8 @@ sub ::asm_init { $win32=1; require "x86masm.pl"; } elsif (($type eq "macosx")) { $aout=1; $macosx=1; require "x86gas.pl"; } + elsif (($type eq "android")) + { $elf=1; $android=1; require "x86gas.pl"; } else { print STDERR <<"EOF"; Pick one target type from diff --git a/app/openssl/crypto/perlasm/x86gas.pl b/app/openssl/crypto/perlasm/x86gas.pl index 6eab727f..682a3a31 100644 --- a/app/openssl/crypto/perlasm/x86gas.pl +++ b/app/openssl/crypto/perlasm/x86gas.pl @@ -45,9 +45,8 @@ sub ::generic undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); if ($#_==0) { &::emit($opcode); } - elsif ($opcode =~ m/^j/o && $#_==1) { &::emit($opcode,@arg); } - elsif ($opcode eq "call" && $#_==1) { &::emit($opcode,@arg); } - elsif ($opcode =~ m/^set/&& $#_==1) { &::emit($opcode,@arg); } + elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) + { &::emit($opcode,@arg); } else { &::emit($opcode.$suffix,@arg);} 1; @@ -91,6 +90,7 @@ sub ::DWP } sub ::QWP { &::DWP(@_); } sub ::BP { &::DWP(@_); } +sub ::WP { &::DWP(@_); } sub ::BC { @_; } sub ::DWC { @_; } @@ -149,22 +149,24 @@ sub ::public_label { push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } sub ::file_end -{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4"; - if ($::elf) { push (@out,"$tmp,4\n"); } - else { push (@out,"$tmp\n"); } - } - if ($::macosx) +{ if ($::macosx) { if (%non_lazy_ptr) { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); foreach $i (keys %non_lazy_ptr) { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } } } + if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; + if ($::macosx) { push (@out,"$tmp,2\n"); } + elsif ($::elf) { push (@out,"$tmp,4\n"); } + else { push (@out,"$tmp\n"); } + } push(@out,$initseg) if ($initseg); } sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } +sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); } sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } sub ::align @@ -180,7 +182,7 @@ sub ::align sub ::picmeup { my($dst,$sym,$base,$reflabel)=@_; - if ($::pic && ($::elf || $::aout)) + if (($::pic && ($::elf || $::aout)) || $::macosx) { if (!defined($base)) { &::call(&::label("PIC_me_up")); &::set_label("PIC_me_up"); @@ -206,13 +208,17 @@ sub ::picmeup sub ::initseg { my $f=$nmdecor.shift; - if ($::elf) + if ($::android) + { $initseg.=<<___; +.section .init_array +.align 4 +.long $f +___ + } + elsif ($::elf) { $initseg.=<<___; .section .init call $f - jmp .Linitalign -.align $align -.Linitalign: ___ } elsif ($::coff) diff --git a/app/openssl/crypto/perlasm/x86masm.pl b/app/openssl/crypto/perlasm/x86masm.pl index 3d50e4a7..f937d07c 100644 --- a/app/openssl/crypto/perlasm/x86masm.pl +++ b/app/openssl/crypto/perlasm/x86masm.pl @@ -14,9 +14,11 @@ sub ::generic { my ($opcode,@arg)=@_; # fix hexadecimal constants - for (@arg) { s/0x([0-9a-f]+)/0$1h/oi; } + for (@arg) { s/(?= 0x02030000\n"); + push(@out,"safeseh ".&::LABEL($nm,$nmdecor.$nm)."\n"); + push(@out,"%endif\n"); +} + 1; diff --git a/app/openssl/crypto/pkcs12/p12_crt.c b/app/openssl/crypto/pkcs12/p12_crt.c index 96b131de..a34915d0 100644 --- a/app/openssl/crypto/pkcs12/p12_crt.c +++ b/app/openssl/crypto/pkcs12/p12_crt.c @@ -90,7 +90,14 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, /* Set defaults */ if (!nid_cert) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; + else +#endif nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC; + } if (!nid_key) nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC; if (!iter) diff --git a/app/openssl/crypto/pkcs12/p12_decr.c b/app/openssl/crypto/pkcs12/p12_decr.c index ba77dbbe..9d3557e8 100644 --- a/app/openssl/crypto/pkcs12/p12_decr.c +++ b/app/openssl/crypto/pkcs12/p12_decr.c @@ -89,7 +89,14 @@ unsigned char * PKCS12_pbe_crypt(X509_ALGOR *algor, const char *pass, goto err; } - EVP_CipherUpdate(&ctx, out, &i, in, inlen); + if (!EVP_CipherUpdate(&ctx, out, &i, in, inlen)) + { + OPENSSL_free(out); + out = NULL; + PKCS12err(PKCS12_F_PKCS12_PBE_CRYPT,ERR_R_EVP_LIB); + goto err; + } + outlen = i; if(!EVP_CipherFinal_ex(&ctx, out + i, &i)) { OPENSSL_free(out); diff --git a/app/openssl/crypto/pkcs12/p12_key.c b/app/openssl/crypto/pkcs12/p12_key.c index 424203f6..61d58502 100644 --- a/app/openssl/crypto/pkcs12/p12_key.c +++ b/app/openssl/crypto/pkcs12/p12_key.c @@ -152,14 +152,16 @@ int PKCS12_key_gen_uni(unsigned char *pass, int passlen, unsigned char *salt, for (i = 0; i < Slen; i++) *p++ = salt[i % saltlen]; for (i = 0; i < Plen; i++) *p++ = pass[i % passlen]; for (;;) { - EVP_DigestInit_ex(&ctx, md_type, NULL); - EVP_DigestUpdate(&ctx, D, v); - EVP_DigestUpdate(&ctx, I, Ilen); - EVP_DigestFinal_ex(&ctx, Ai, NULL); + if (!EVP_DigestInit_ex(&ctx, md_type, NULL) + || !EVP_DigestUpdate(&ctx, D, v) + || !EVP_DigestUpdate(&ctx, I, Ilen) + || !EVP_DigestFinal_ex(&ctx, Ai, NULL)) + goto err; for (j = 1; j < iter; j++) { - EVP_DigestInit_ex(&ctx, md_type, NULL); - EVP_DigestUpdate(&ctx, Ai, u); - EVP_DigestFinal_ex(&ctx, Ai, NULL); + if (!EVP_DigestInit_ex(&ctx, md_type, NULL) + || !EVP_DigestUpdate(&ctx, Ai, u) + || !EVP_DigestFinal_ex(&ctx, Ai, NULL)) + goto err; } memcpy (out, Ai, min (n, u)); if (u >= n) { @@ -174,24 +176,32 @@ int PKCS12_key_gen_uni(unsigned char *pass, int passlen, unsigned char *salt, out += u; for (j = 0; j < v; j++) B[j] = Ai[j % u]; /* Work out B + 1 first then can use B as tmp space */ - if (!BN_bin2bn (B, v, Bpl1)) goto err; - if (!BN_add_word (Bpl1, 1)) goto err; + if (!BN_bin2bn (B, v, Bpl1)) + goto err; + if (!BN_add_word (Bpl1, 1)) + goto err; for (j = 0; j < Ilen ; j+=v) { - if (!BN_bin2bn (I + j, v, Ij)) goto err; - if (!BN_add (Ij, Ij, Bpl1)) goto err; - BN_bn2bin (Ij, B); + if (!BN_bin2bn(I + j, v, Ij)) + goto err; + if (!BN_add(Ij, Ij, Bpl1)) + goto err; + if (!BN_bn2bin(Ij, B)) + goto err; Ijlen = BN_num_bytes (Ij); /* If more than 2^(v*8) - 1 cut off MSB */ if (Ijlen > v) { - BN_bn2bin (Ij, B); + if (!BN_bn2bin (Ij, B)) + goto err; memcpy (I + j, B + 1, v); #ifndef PKCS12_BROKEN_KEYGEN /* If less than v bytes pad with zeroes */ } else if (Ijlen < v) { memset(I + j, 0, v - Ijlen); - BN_bn2bin(Ij, I + j + v - Ijlen); + if (!BN_bn2bin(Ij, I + j + v - Ijlen)) + goto err; #endif - } else BN_bn2bin (Ij, I + j); + } else if (!BN_bn2bin (Ij, I + j)) + goto err; } } diff --git a/app/openssl/crypto/pkcs12/p12_kiss.c b/app/openssl/crypto/pkcs12/p12_kiss.c index 292cc3ed..206b1b0b 100644 --- a/app/openssl/crypto/pkcs12/p12_kiss.c +++ b/app/openssl/crypto/pkcs12/p12_kiss.c @@ -167,7 +167,7 @@ int PKCS12_parse(PKCS12 *p12, const char *pass, EVP_PKEY **pkey, X509 **cert, if (cert && *cert) X509_free(*cert); if (x) - X509_free(*cert); + X509_free(x); if (ocerts) sk_X509_pop_free(ocerts, X509_free); return 0; diff --git a/app/openssl/crypto/pkcs12/p12_mutl.c b/app/openssl/crypto/pkcs12/p12_mutl.c index 9ab740d5..96de1bd1 100644 --- a/app/openssl/crypto/pkcs12/p12_mutl.c +++ b/app/openssl/crypto/pkcs12/p12_mutl.c @@ -97,10 +97,14 @@ int PKCS12_gen_mac(PKCS12 *p12, const char *pass, int passlen, return 0; } HMAC_CTX_init(&hmac); - HMAC_Init_ex(&hmac, key, md_size, md_type, NULL); - HMAC_Update(&hmac, p12->authsafes->d.data->data, - p12->authsafes->d.data->length); - HMAC_Final(&hmac, mac, maclen); + if (!HMAC_Init_ex(&hmac, key, md_size, md_type, NULL) + || !HMAC_Update(&hmac, p12->authsafes->d.data->data, + p12->authsafes->d.data->length) + || !HMAC_Final(&hmac, mac, maclen)) + { + HMAC_CTX_cleanup(&hmac); + return 0; + } HMAC_CTX_cleanup(&hmac); return 1; } diff --git a/app/openssl/crypto/pkcs7/pk7_doit.c b/app/openssl/crypto/pkcs7/pk7_doit.c index 3bf1a367..77fda3b8 100644 --- a/app/openssl/crypto/pkcs7/pk7_doit.c +++ b/app/openssl/crypto/pkcs7/pk7_doit.c @@ -204,11 +204,11 @@ static int pkcs7_decrypt_rinfo(unsigned char **pek, int *peklen, unsigned char *ek = NULL; size_t eklen; - int ret = 0; + int ret = -1; pctx = EVP_PKEY_CTX_new(pkey, NULL); if (!pctx) - return 0; + return -1; if (EVP_PKEY_decrypt_init(pctx) <= 0) goto err; @@ -235,12 +235,19 @@ static int pkcs7_decrypt_rinfo(unsigned char **pek, int *peklen, if (EVP_PKEY_decrypt(pctx, ek, &eklen, ri->enc_key->data, ri->enc_key->length) <= 0) { + ret = 0; PKCS7err(PKCS7_F_PKCS7_DECRYPT_RINFO, ERR_R_EVP_LIB); goto err; } ret = 1; + if (*pek) + { + OPENSSL_cleanse(*pek, *peklen); + OPENSSL_free(*pek); + } + *pek = ek; *peklen = eklen; @@ -423,6 +430,8 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) STACK_OF(X509_ALGOR) *md_sk=NULL; STACK_OF(PKCS7_RECIP_INFO) *rsk=NULL; PKCS7_RECIP_INFO *ri=NULL; + unsigned char *ek = NULL, *tkey = NULL; + int eklen = 0, tkeylen = 0; i=OBJ_obj2nid(p7->type); p7->state=PKCS7_S_HEADER; @@ -500,8 +509,6 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) int max; X509_OBJECT ret; #endif - unsigned char *ek = NULL; - int eklen; if ((etmp=BIO_new(BIO_f_cipher())) == NULL) { @@ -534,29 +541,28 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) } /* If we haven't got a certificate try each ri in turn */ - if (pcert == NULL) { + /* Always attempt to decrypt all rinfo even + * after sucess as a defence against MMA timing + * attacks. + */ for (i=0; i 0) - break; + ri, pkey) < 0) + goto err; ERR_clear_error(); - ri = NULL; - } - if (ri == NULL) - { - PKCS7err(PKCS7_F_PKCS7_DATADECODE, - PKCS7_R_NO_RECIPIENT_MATCHES_KEY); - goto err; } } else { - if (pkcs7_decrypt_rinfo(&ek, &eklen, ri, pkey) <= 0) + /* Only exit on fatal errors, not decrypt failure */ + if (pkcs7_decrypt_rinfo(&ek, &eklen, ri, pkey) < 0) goto err; + ERR_clear_error(); } evp_ctx=NULL; @@ -565,6 +571,19 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) goto err; if (EVP_CIPHER_asn1_to_param(evp_ctx,enc_alg->parameter) < 0) goto err; + /* Generate random key as MMA defence */ + tkeylen = EVP_CIPHER_CTX_key_length(evp_ctx); + tkey = OPENSSL_malloc(tkeylen); + if (!tkey) + goto err; + if (EVP_CIPHER_CTX_rand_key(evp_ctx, tkey) <= 0) + goto err; + if (ek == NULL) + { + ek = tkey; + eklen = tkeylen; + tkey = NULL; + } if (eklen != EVP_CIPHER_CTX_key_length(evp_ctx)) { /* Some S/MIME clients don't use the same key @@ -573,11 +592,16 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) */ if(!EVP_CIPHER_CTX_set_key_length(evp_ctx, eklen)) { - PKCS7err(PKCS7_F_PKCS7_DATADECODE, - PKCS7_R_DECRYPTED_KEY_IS_WRONG_LENGTH); - goto err; + /* Use random key as MMA defence */ + OPENSSL_cleanse(ek, eklen); + OPENSSL_free(ek); + ek = tkey; + eklen = tkeylen; + tkey = NULL; } } + /* Clear errors so we don't leak information useful in MMA */ + ERR_clear_error(); if (EVP_CipherInit_ex(evp_ctx,NULL,NULL,ek,NULL,0) <= 0) goto err; @@ -585,6 +609,13 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) { OPENSSL_cleanse(ek,eklen); OPENSSL_free(ek); + ek = NULL; + } + if (tkey) + { + OPENSSL_cleanse(tkey,tkeylen); + OPENSSL_free(tkey); + tkey = NULL; } if (out == NULL) @@ -627,6 +658,16 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) if (0) { err: + if (ek) + { + OPENSSL_cleanse(ek,eklen); + OPENSSL_free(ek); + } + if (tkey) + { + OPENSSL_cleanse(tkey,tkeylen); + OPENSSL_free(tkey); + } if (out != NULL) BIO_free_all(out); if (btmp != NULL) BIO_free_all(btmp); if (etmp != NULL) BIO_free_all(etmp); @@ -676,7 +717,11 @@ static int do_pkcs7_signed_attrib(PKCS7_SIGNER_INFO *si, EVP_MD_CTX *mctx) } /* Add digest */ - EVP_DigestFinal_ex(mctx, md_data,&md_len); + if (!EVP_DigestFinal_ex(mctx, md_data,&md_len)) + { + PKCS7err(PKCS7_F_DO_PKCS7_SIGNED_ATTRIB, ERR_R_EVP_LIB); + return 0; + } if (!PKCS7_add1_attrib_digest(si, md_data, md_len)) { PKCS7err(PKCS7_F_DO_PKCS7_SIGNED_ATTRIB, ERR_R_MALLOC_FAILURE); @@ -784,7 +829,8 @@ int PKCS7_dataFinal(PKCS7 *p7, BIO *bio) /* We now have the EVP_MD_CTX, lets do the * signing. */ - EVP_MD_CTX_copy_ex(&ctx_tmp,mdc); + if (!EVP_MD_CTX_copy_ex(&ctx_tmp,mdc)) + goto err; sk=si->auth_attr; @@ -822,7 +868,8 @@ int PKCS7_dataFinal(PKCS7 *p7, BIO *bio) if (!PKCS7_find_digest(&mdc, bio, OBJ_obj2nid(p7->d.digest->md->algorithm))) goto err; - EVP_DigestFinal_ex(mdc,md_data,&md_len); + if (!EVP_DigestFinal_ex(mdc,md_data,&md_len)) + goto err; M_ASN1_OCTET_STRING_set(p7->d.digest->digest, md_data, md_len); } @@ -1015,7 +1062,8 @@ int PKCS7_signatureVerify(BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si, /* mdc is the digest ctx that we want, unless there are attributes, * in which case the digest is the signed attributes */ - EVP_MD_CTX_copy_ex(&mdc_tmp,mdc); + if (!EVP_MD_CTX_copy_ex(&mdc_tmp,mdc)) + goto err; sk=si->auth_attr; if ((sk != NULL) && (sk_X509_ATTRIBUTE_num(sk) != 0)) @@ -1025,7 +1073,8 @@ int PKCS7_signatureVerify(BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si, int alen; ASN1_OCTET_STRING *message_digest; - EVP_DigestFinal_ex(&mdc_tmp,md_dat,&md_len); + if (!EVP_DigestFinal_ex(&mdc_tmp,md_dat,&md_len)) + goto err; message_digest=PKCS7_digest_from_attributes(sk); if (!message_digest) { @@ -1050,7 +1099,8 @@ for (ii=0; ii 0 && BIO_method_type(tmpmem) == BIO_TYPE_CIPHER) + { + if (!BIO_get_cipher_status(tmpmem)) + ret = 0; + } BIO_free_all(bread); return ret; } else { for(;;) { i = BIO_read(tmpmem, buf, sizeof(buf)); - if(i <= 0) break; - BIO_write(data, buf, i); + if(i <= 0) + { + ret = 1; + if (BIO_method_type(tmpmem) == BIO_TYPE_CIPHER) + { + if (!BIO_get_cipher_status(tmpmem)) + ret = 0; + } + + break; + } + if (BIO_write(data, buf, i) != i) + { + ret = 0; + break; + } } BIO_free_all(tmpmem); - return 1; + return ret; } } diff --git a/app/openssl/crypto/ppccpuid.pl b/app/openssl/crypto/ppccpuid.pl index 369e1d0d..4ba736a1 100755 --- a/app/openssl/crypto/ppccpuid.pl +++ b/app/openssl/crypto/ppccpuid.pl @@ -23,36 +23,67 @@ $code=<<___; .machine "any" .text -.globl .OPENSSL_cpuid_setup +.globl .OPENSSL_ppc64_probe .align 4 -.OPENSSL_cpuid_setup: +.OPENSSL_ppc64_probe: + fcfid f1,f1 + extrdi r0,r0,32,0 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.globl .OPENSSL_altivec_probe +.align 4 +.OPENSSL_altivec_probe: + .long 0x10000484 # vor v0,v0,v0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .globl .OPENSSL_wipe_cpu .align 4 .OPENSSL_wipe_cpu: xor r0,r0,r0 + fmr f0,f31 + fmr f1,f31 + fmr f2,f31 mr r3,r1 + fmr f3,f31 xor r4,r4,r4 + fmr f4,f31 xor r5,r5,r5 + fmr f5,f31 xor r6,r6,r6 + fmr f6,f31 xor r7,r7,r7 + fmr f7,f31 xor r8,r8,r8 + fmr f8,f31 xor r9,r9,r9 + fmr f9,f31 xor r10,r10,r10 + fmr f10,f31 xor r11,r11,r11 + fmr f11,f31 xor r12,r12,r12 + fmr f12,f31 + fmr f13,f31 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .globl .OPENSSL_atomic_add .align 4 .OPENSSL_atomic_add: -Loop: lwarx r5,0,r3 +Ladd: lwarx r5,0,r3 add r0,r4,r5 stwcx. r0,0,r3 - bne- Loop + bne- Ladd $SIGNX r3,r0 blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 .globl .OPENSSL_rdtsc .align 4 @@ -60,6 +91,8 @@ Loop: lwarx r5,0,r3 mftb r3 mftbu r4 blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 .globl .OPENSSL_cleanse .align 4 @@ -72,7 +105,7 @@ Loop: lwarx r5,0,r3 Little: mtctr r4 stb r0,0(r3) addi r3,r3,1 - bdnz- \$-8 + bdnz \$-8 blr Lot: andi. r5,r3,3 beq Laligned @@ -85,10 +118,13 @@ Laligned: mtctr r5 stw r0,0(r3) addi r3,r3,4 - bdnz- \$-8 + bdnz \$-8 andi. r4,r4,3 bne Little blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/rand/md_rand.c b/app/openssl/crypto/rand/md_rand.c index b2f04ff1..aee1c30b 100644 --- a/app/openssl/crypto/rand/md_rand.c +++ b/app/openssl/crypto/rand/md_rand.c @@ -109,6 +109,8 @@ * */ +#define OPENSSL_FIPSEVP + #ifdef MD_RAND_DEBUG # ifndef NDEBUG # define NDEBUG @@ -121,10 +123,10 @@ #include "e_os.h" +#include #include #include "rand_lcl.h" -#include #include #ifdef BN_DEBUG @@ -157,13 +159,14 @@ const char RAND_version[]="RAND" OPENSSL_VERSION_PTEXT; static void ssleay_rand_cleanup(void); static void ssleay_rand_seed(const void *buf, int num); static void ssleay_rand_add(const void *buf, int num, double add_entropy); -static int ssleay_rand_bytes(unsigned char *buf, int num); +static int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo); +static int ssleay_rand_nopseudo_bytes(unsigned char *buf, int num); static int ssleay_rand_pseudo_bytes(unsigned char *buf, int num); static int ssleay_rand_status(void); RAND_METHOD rand_ssleay_meth={ ssleay_rand_seed, - ssleay_rand_bytes, + ssleay_rand_nopseudo_bytes, ssleay_rand_cleanup, ssleay_rand_add, ssleay_rand_pseudo_bytes, @@ -195,6 +198,9 @@ static void ssleay_rand_add(const void *buf, int num, double add) EVP_MD_CTX m; int do_not_lock; + if (!num) + return; + /* * (Based on the rand(3) manpage) * @@ -328,7 +334,7 @@ static void ssleay_rand_seed(const void *buf, int num) ssleay_rand_add(buf, num, (double)num); } -static int ssleay_rand_bytes(unsigned char *buf, int num) +static int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo) { static volatile int stirred_pool = 0; int i,j,k,st_num,st_idx; @@ -377,8 +383,11 @@ static int ssleay_rand_bytes(unsigned char *buf, int num) * are fed into the hash function and the results are kept in the * global 'md'. */ - - CRYPTO_w_lock(CRYPTO_LOCK_RAND); +#ifdef OPENSSL_FIPS + /* NB: in FIPS mode we are already under a lock */ + if (!FIPS_mode()) +#endif + CRYPTO_w_lock(CRYPTO_LOCK_RAND); /* prevent ssleay_rand_bytes() from trying to obtain the lock again */ CRYPTO_w_lock(CRYPTO_LOCK_RAND2); @@ -457,7 +466,10 @@ static int ssleay_rand_bytes(unsigned char *buf, int num) /* before unlocking, we must clear 'crypto_lock_rand' */ crypto_lock_rand = 0; - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); +#ifdef OPENSSL_FIPS + if (!FIPS_mode()) +#endif + CRYPTO_w_unlock(CRYPTO_LOCK_RAND); while (num > 0) { @@ -509,15 +521,23 @@ static int ssleay_rand_bytes(unsigned char *buf, int num) MD_Init(&m); MD_Update(&m,(unsigned char *)&(md_c[0]),sizeof(md_c)); MD_Update(&m,local_md,MD_DIGEST_LENGTH); - CRYPTO_w_lock(CRYPTO_LOCK_RAND); +#ifdef OPENSSL_FIPS + if (!FIPS_mode()) +#endif + CRYPTO_w_lock(CRYPTO_LOCK_RAND); MD_Update(&m,md,MD_DIGEST_LENGTH); MD_Final(&m,md); - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); +#ifdef OPENSSL_FIPS + if (!FIPS_mode()) +#endif + CRYPTO_w_unlock(CRYPTO_LOCK_RAND); EVP_MD_CTX_cleanup(&m); if (ok) return(1); - else + else if (pseudo) + return 0; + else { RANDerr(RAND_F_SSLEAY_RAND_BYTES,RAND_R_PRNG_NOT_SEEDED); ERR_add_error_data(1, "You need to read the OpenSSL FAQ, " @@ -526,22 +546,16 @@ static int ssleay_rand_bytes(unsigned char *buf, int num) } } +static int ssleay_rand_nopseudo_bytes(unsigned char *buf, int num) + { + return ssleay_rand_bytes(buf, num, 0); + } + /* pseudo-random bytes that are guaranteed to be unique but not unpredictable */ static int ssleay_rand_pseudo_bytes(unsigned char *buf, int num) { - int ret; - unsigned long err; - - ret = RAND_bytes(buf, num); - if (ret == 0) - { - err = ERR_peek_error(); - if (ERR_GET_LIB(err) == ERR_LIB_RAND && - ERR_GET_REASON(err) == RAND_R_PRNG_NOT_SEEDED) - ERR_clear_error(); - } - return (ret); + return ssleay_rand_bytes(buf, num, 1); } static int ssleay_rand_status(void) diff --git a/app/openssl/crypto/rand/rand.h b/app/openssl/crypto/rand/rand.h index ac6c0217..bb5520e8 100644 --- a/app/openssl/crypto/rand/rand.h +++ b/app/openssl/crypto/rand/rand.h @@ -119,6 +119,11 @@ int RAND_event(UINT, WPARAM, LPARAM); #endif +#ifdef OPENSSL_FIPS +void RAND_set_fips_drbg_type(int type, int flags); +int RAND_init_fips(void); +#endif + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -129,9 +134,14 @@ void ERR_load_RAND_strings(void); /* Function codes. */ #define RAND_F_RAND_GET_RAND_METHOD 101 +#define RAND_F_RAND_INIT_FIPS 102 #define RAND_F_SSLEAY_RAND_BYTES 100 /* Reason codes. */ +#define RAND_R_DUAL_EC_DRBG_DISABLED 104 +#define RAND_R_ERROR_INITIALISING_DRBG 102 +#define RAND_R_ERROR_INSTANTIATING_DRBG 103 +#define RAND_R_NO_FIPS_RANDOM_METHOD_SET 101 #define RAND_R_PRNG_NOT_SEEDED 100 #ifdef __cplusplus diff --git a/app/openssl/crypto/rand/rand_err.c b/app/openssl/crypto/rand/rand_err.c index 03cda4dd..c4c80fc8 100644 --- a/app/openssl/crypto/rand/rand_err.c +++ b/app/openssl/crypto/rand/rand_err.c @@ -1,6 +1,6 @@ /* crypto/rand/rand_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -71,12 +71,17 @@ static ERR_STRING_DATA RAND_str_functs[]= { {ERR_FUNC(RAND_F_RAND_GET_RAND_METHOD), "RAND_get_rand_method"}, +{ERR_FUNC(RAND_F_RAND_INIT_FIPS), "RAND_init_fips"}, {ERR_FUNC(RAND_F_SSLEAY_RAND_BYTES), "SSLEAY_RAND_BYTES"}, {0,NULL} }; static ERR_STRING_DATA RAND_str_reasons[]= { +{ERR_REASON(RAND_R_DUAL_EC_DRBG_DISABLED),"dual ec drbg disabled"}, +{ERR_REASON(RAND_R_ERROR_INITIALISING_DRBG),"error initialising drbg"}, +{ERR_REASON(RAND_R_ERROR_INSTANTIATING_DRBG),"error instantiating drbg"}, +{ERR_REASON(RAND_R_NO_FIPS_RANDOM_METHOD_SET),"no fips random method set"}, {ERR_REASON(RAND_R_PRNG_NOT_SEEDED) ,"PRNG not seeded"}, {0,NULL} }; diff --git a/app/openssl/crypto/rand/rand_lib.c b/app/openssl/crypto/rand/rand_lib.c index 513e3389..5ac0e14c 100644 --- a/app/openssl/crypto/rand/rand_lib.c +++ b/app/openssl/crypto/rand/rand_lib.c @@ -60,10 +60,16 @@ #include #include "cryptlib.h" #include + #ifndef OPENSSL_NO_ENGINE #include #endif +#ifdef OPENSSL_FIPS +#include +#include +#endif + #ifndef OPENSSL_NO_ENGINE /* non-NULL if default_RAND_meth is ENGINE-provided */ static ENGINE *funct_ref =NULL; @@ -174,3 +180,127 @@ int RAND_status(void) return meth->status(); return 0; } + +#ifdef OPENSSL_FIPS + +/* FIPS DRBG initialisation code. This sets up the DRBG for use by the + * rest of OpenSSL. + */ + +/* Entropy gatherer: use standard OpenSSL PRNG to seed (this will gather + * entropy internally through RAND_poll(). + */ + +static size_t drbg_get_entropy(DRBG_CTX *ctx, unsigned char **pout, + int entropy, size_t min_len, size_t max_len) + { + /* Round up request to multiple of block size */ + min_len = ((min_len + 19) / 20) * 20; + *pout = OPENSSL_malloc(min_len); + if (!*pout) + return 0; + if (RAND_SSLeay()->bytes(*pout, min_len) <= 0) + { + OPENSSL_free(*pout); + *pout = NULL; + return 0; + } + return min_len; + } + +static void drbg_free_entropy(DRBG_CTX *ctx, unsigned char *out, size_t olen) + { + if (out) + { + OPENSSL_cleanse(out, olen); + OPENSSL_free(out); + } + } + +/* Set "additional input" when generating random data. This uses the + * current PID, a time value and a counter. + */ + +static size_t drbg_get_adin(DRBG_CTX *ctx, unsigned char **pout) + { + /* Use of static variables is OK as this happens under a lock */ + static unsigned char buf[16]; + static unsigned long counter; + FIPS_get_timevec(buf, &counter); + *pout = buf; + return sizeof(buf); + } + +/* RAND_add() and RAND_seed() pass through to OpenSSL PRNG so it is + * correctly seeded by RAND_poll(). + */ + +static int drbg_rand_add(DRBG_CTX *ctx, const void *in, int inlen, + double entropy) + { + RAND_SSLeay()->add(in, inlen, entropy); + return 1; + } + +static int drbg_rand_seed(DRBG_CTX *ctx, const void *in, int inlen) + { + RAND_SSLeay()->seed(in, inlen); + return 1; + } + +#ifndef OPENSSL_DRBG_DEFAULT_TYPE +#define OPENSSL_DRBG_DEFAULT_TYPE NID_aes_256_ctr +#endif +#ifndef OPENSSL_DRBG_DEFAULT_FLAGS +#define OPENSSL_DRBG_DEFAULT_FLAGS DRBG_FLAG_CTR_USE_DF +#endif + +static int fips_drbg_type = OPENSSL_DRBG_DEFAULT_TYPE; +static int fips_drbg_flags = OPENSSL_DRBG_DEFAULT_FLAGS; + +void RAND_set_fips_drbg_type(int type, int flags) + { + fips_drbg_type = type; + fips_drbg_flags = flags; + } + +int RAND_init_fips(void) + { + DRBG_CTX *dctx; + size_t plen; + unsigned char pers[32], *p; +#ifndef OPENSSL_ALLOW_DUAL_EC_DRBG + if (fips_drbg_type >> 16) + { + RANDerr(RAND_F_RAND_INIT_FIPS, RAND_R_DUAL_EC_DRBG_DISABLED); + return 0; + } +#endif + + dctx = FIPS_get_default_drbg(); + if (FIPS_drbg_init(dctx, fips_drbg_type, fips_drbg_flags) <= 0) + { + RANDerr(RAND_F_RAND_INIT_FIPS, RAND_R_ERROR_INITIALISING_DRBG); + return 0; + } + + FIPS_drbg_set_callbacks(dctx, + drbg_get_entropy, drbg_free_entropy, 20, + drbg_get_entropy, drbg_free_entropy); + FIPS_drbg_set_rand_callbacks(dctx, drbg_get_adin, 0, + drbg_rand_seed, drbg_rand_add); + /* Personalisation string: a string followed by date time vector */ + strcpy((char *)pers, "OpenSSL DRBG2.0"); + plen = drbg_get_adin(dctx, &p); + memcpy(pers + 16, p, plen); + + if (FIPS_drbg_instantiate(dctx, pers, sizeof(pers)) <= 0) + { + RANDerr(RAND_F_RAND_INIT_FIPS, RAND_R_ERROR_INSTANTIATING_DRBG); + return 0; + } + FIPS_rand_set_method(FIPS_drbg_method()); + return 1; + } + +#endif diff --git a/app/openssl/crypto/rand/rand_unix.c b/app/openssl/crypto/rand/rand_unix.c index e9ead3a5..e3a65571 100644 --- a/app/openssl/crypto/rand/rand_unix.c +++ b/app/openssl/crypto/rand/rand_unix.c @@ -133,47 +133,87 @@ # define FD_SETSIZE (8*sizeof(fd_set)) #endif -#ifdef __VOS__ +#if defined(OPENSSL_SYS_VOS) + +/* The following algorithm repeatedly samples the real-time clock + (RTC) to generate a sequence of unpredictable data. The algorithm + relies upon the uneven execution speed of the code (due to factors + such as cache misses, interrupts, bus activity, and scheduling) and + upon the rather large relative difference between the speed of the + clock and the rate at which it can be read. + + If this code is ported to an environment where execution speed is + more constant or where the RTC ticks at a much slower rate, or the + clock can be read with fewer instructions, it is likely that the + results would be far more predictable. + + As a precaution, we generate 4 times the minimum required amount of + seed data. */ + int RAND_poll(void) { - unsigned char buf[ENTROPY_NEEDED]; + short int code; + gid_t curr_gid; pid_t curr_pid; uid_t curr_uid; - static int first=1; - int i; - long rnd = 0; + int i, k; struct timespec ts; - unsigned seed; - -/* The VOS random() function starts from a static seed so its - initial value is predictable. If random() returns the - initial value, reseed it with dynamic data. The VOS - real-time clock has a granularity of 1 nsec so it should be - reasonably difficult to predict its exact value. Do not - gratuitously reseed the PRNG because other code in this - process or thread may be using it. */ - - if (first) { - first = 0; - rnd = random (); - if (rnd == 1804289383) { - clock_gettime (CLOCK_REALTIME, &ts); - curr_pid = getpid(); - curr_uid = getuid(); - seed = ts.tv_sec ^ ts.tv_nsec ^ curr_pid ^ curr_uid; - srandom (seed); - } - } + unsigned char v; - for (i = 0; i < sizeof(buf); i++) { - if (i % 4 == 0) - rnd = random(); - buf[i] = rnd; - rnd >>= 8; - } - RAND_add(buf, sizeof(buf), ENTROPY_NEEDED); - memset(buf, 0, sizeof(buf)); +#ifdef OPENSSL_SYS_VOS_HPPA + long duration; + extern void s$sleep (long *_duration, short int *_code); +#else +#ifdef OPENSSL_SYS_VOS_IA32 + long long duration; + extern void s$sleep2 (long long *_duration, short int *_code); +#else +#error "Unsupported Platform." +#endif /* OPENSSL_SYS_VOS_IA32 */ +#endif /* OPENSSL_SYS_VOS_HPPA */ + + /* Seed with the gid, pid, and uid, to ensure *some* + variation between different processes. */ + + curr_gid = getgid(); + RAND_add (&curr_gid, sizeof curr_gid, 1); + curr_gid = 0; + + curr_pid = getpid(); + RAND_add (&curr_pid, sizeof curr_pid, 1); + curr_pid = 0; + + curr_uid = getuid(); + RAND_add (&curr_uid, sizeof curr_uid, 1); + curr_uid = 0; + for (i=0; i<(ENTROPY_NEEDED*4); i++) + { + /* burn some cpu; hope for interrupts, cache + collisions, bus interference, etc. */ + for (k=0; k<99; k++) + ts.tv_nsec = random (); + +#ifdef OPENSSL_SYS_VOS_HPPA + /* sleep for 1/1024 of a second (976 us). */ + duration = 1; + s$sleep (&duration, &code); +#else +#ifdef OPENSSL_SYS_VOS_IA32 + /* sleep for 1/65536 of a second (15 us). */ + duration = 1; + s$sleep2 (&duration, &code); +#endif /* OPENSSL_SYS_VOS_IA32 */ +#endif /* OPENSSL_SYS_VOS_HPPA */ + + /* get wall clock time. */ + clock_gettime (CLOCK_REALTIME, &ts); + + /* take 8 bits */ + v = (unsigned char) (ts.tv_nsec % 256); + RAND_add (&v, sizeof v, 1); + v = 0; + } return 1; } #elif defined __OpenBSD__ diff --git a/app/openssl/crypto/rand/rand_win.c b/app/openssl/crypto/rand/rand_win.c index 5d134e18..34ffcd23 100644 --- a/app/openssl/crypto/rand/rand_win.c +++ b/app/openssl/crypto/rand/rand_win.c @@ -750,7 +750,7 @@ static void readscreen(void) int y; /* y-coordinate of screen lines to grab */ int n = 16; /* number of screen lines to grab at a time */ - if (GetVersion() < 0x80000000 && OPENSSL_isservice()>0) + if (check_winnt() && OPENSSL_isservice()>0) return; /* Create a screen DC and a memory DC compatible to screen DC */ diff --git a/app/openssl/crypto/rand/randfile.c b/app/openssl/crypto/rand/randfile.c index bc7d9c58..7f142807 100644 --- a/app/openssl/crypto/rand/randfile.c +++ b/app/openssl/crypto/rand/randfile.c @@ -57,7 +57,9 @@ */ /* We need to define this to get macros like S_IFBLK and S_IFCHR */ +#if !defined(OPENSSL_SYS_VXWORKS) #define _XOPEN_SOURCE 500 +#endif #include #include @@ -137,7 +139,7 @@ int RAND_load_file(const char *file, long bytes) in=fopen(file,"rb"); #endif if (in == NULL) goto err; -#if defined(S_IFBLK) && defined(S_IFCHR) && !defined(OPNESSL_NO_POSIX_IO) +#if defined(S_IFBLK) && defined(S_IFCHR) && !defined(OPENSSL_NO_POSIX_IO) if (sb.st_mode & (S_IFBLK | S_IFCHR)) { /* this file is a device. we don't want read an infinite number * of bytes from a random device, nor do we want to use buffered diff --git a/app/openssl/crypto/rc2/rc2.h b/app/openssl/crypto/rc2/rc2.h index 34c83623..e542ec94 100644 --- a/app/openssl/crypto/rc2/rc2.h +++ b/app/openssl/crypto/rc2/rc2.h @@ -79,7 +79,9 @@ typedef struct rc2_key_st RC2_INT data[64]; } RC2_KEY; - +#ifdef OPENSSL_FIPS +void private_RC2_set_key(RC2_KEY *key, int len, const unsigned char *data,int bits); +#endif void RC2_set_key(RC2_KEY *key, int len, const unsigned char *data,int bits); void RC2_ecb_encrypt(const unsigned char *in,unsigned char *out,RC2_KEY *key, int enc); diff --git a/app/openssl/crypto/rc2/rc2_skey.c b/app/openssl/crypto/rc2/rc2_skey.c index 0150b0e0..6668ac01 100644 --- a/app/openssl/crypto/rc2/rc2_skey.c +++ b/app/openssl/crypto/rc2/rc2_skey.c @@ -56,6 +56,7 @@ * [including the GNU Public Licence.] */ +#include #include #include "rc2_locl.h" @@ -95,6 +96,13 @@ static const unsigned char key_table[256]={ * the same as specifying 1024 for the 'bits' parameter. Bsafe uses * a version where the bits parameter is the same as len*8 */ void RC2_set_key(RC2_KEY *key, int len, const unsigned char *data, int bits) +#ifdef OPENSSL_FIPS + { + fips_cipher_abort(RC2); + private_RC2_set_key(key, len, data, bits); + } +void private_RC2_set_key(RC2_KEY *key, int len, const unsigned char *data, int bits) +#endif { int i,j; unsigned char *k; diff --git a/app/openssl/crypto/rc4/asm/rc4-586.pl b/app/openssl/crypto/rc4/asm/rc4-586.pl index 38a44a70..5c9ac6ad 100644 --- a/app/openssl/crypto/rc4/asm/rc4-586.pl +++ b/app/openssl/crypto/rc4/asm/rc4-586.pl @@ -28,6 +28,34 @@ # # +# May 2011 +# +# Optimize for Core2 and Westmere [and incidentally Opteron]. Current +# performance in cycles per processed byte (less is better) and +# improvement relative to previous version of this module is: +# +# Pentium 10.2 # original numbers +# Pentium III 7.8(*) +# Intel P4 7.5 +# +# Opteron 6.1/+20% # new MMX numbers +# Core2 5.3/+67%(**) +# Westmere 5.1/+94%(**) +# Sandy Bridge 5.0/+8% +# Atom 12.6/+6% +# +# (*) PIII can actually deliver 6.6 cycles per byte with MMX code, +# but this specific code performs poorly on Core2. And vice +# versa, below MMX/SSE code delivering 5.8/7.1 on Core2 performs +# poorly on PIII, at 8.0/14.5:-( As PIII is not a "hot" CPU +# [anymore], I chose to discard PIII-specific code path and opt +# for original IALU-only code, which is why MMX/SSE code path +# is guarded by SSE2 bit (see below), not MMX/SSE. +# (**) Performance vs. block size on Core2 and Westmere had a maximum +# at ... 64 bytes block size. And it was quite a maximum, 40-60% +# in comparison to largest 8KB block size. Above improvement +# coefficients are for the largest block size. + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; @@ -62,6 +90,68 @@ sub RC4_loop { &$func ($out,&DWP(0,$dat,$ty,4)); } +if ($alt=0) { + # >20% faster on Atom and Sandy Bridge[!], 8% faster on Opteron, + # but ~40% slower on Core2 and Westmere... Attempt to add movz + # brings down Opteron by 25%, Atom and Sandy Bridge by 15%, yet + # on Core2 with movz it's almost 20% slower than below alternative + # code... Yes, it's a total mess... + my @XX=($xx,$out); + $RC4_loop_mmx = sub { # SSE actually... + my $i=shift; + my $j=$i<=0?0:$i>>1; + my $mm=$i<=0?"mm0":"mm".($i&1); + + &add (&LB($yy),&LB($tx)); + &lea (@XX[1],&DWP(1,@XX[0])); + &pxor ("mm2","mm0") if ($i==0); + &psllq ("mm1",8) if ($i==0); + &and (@XX[1],0xff); + &pxor ("mm0","mm0") if ($i<=0); + &mov ($ty,&DWP(0,$dat,$yy,4)); + &mov (&DWP(0,$dat,$yy,4),$tx); + &pxor ("mm1","mm2") if ($i==0); + &mov (&DWP(0,$dat,$XX[0],4),$ty); + &add (&LB($ty),&LB($tx)); + &movd (@XX[0],"mm7") if ($i==0); + &mov ($tx,&DWP(0,$dat,@XX[1],4)); + &pxor ("mm1","mm1") if ($i==1); + &movq ("mm2",&QWP(0,$inp)) if ($i==1); + &movq (&QWP(-8,(@XX[0],$inp)),"mm1") if ($i==0); + &pinsrw ($mm,&DWP(0,$dat,$ty,4),$j); + + push (@XX,shift(@XX)) if ($i>=0); + } +} else { + # Using pinsrw here improves performane on Intel CPUs by 2-3%, but + # brings down AMD by 7%... + $RC4_loop_mmx = sub { + my $i=shift; + + &add (&LB($yy),&LB($tx)); + &psllq ("mm1",8*(($i-1)&7)) if (abs($i)!=1); + &mov ($ty,&DWP(0,$dat,$yy,4)); + &mov (&DWP(0,$dat,$yy,4),$tx); + &mov (&DWP(0,$dat,$xx,4),$ty); + &inc ($xx); + &add ($ty,$tx); + &movz ($xx,&LB($xx)); # (*) + &movz ($ty,&LB($ty)); # (*) + &pxor ("mm2",$i==1?"mm0":"mm1") if ($i>=0); + &movq ("mm0",&QWP(0,$inp)) if ($i<=0); + &movq (&QWP(-8,($out,$inp)),"mm2") if ($i==0); + &mov ($tx,&DWP(0,$dat,$xx,4)); + &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); + + # (*) This is the key to Core2 and Westmere performance. + # Whithout movz out-of-order execution logic confuses + # itself and fails to reorder loads and stores. Problem + # appears to be fixed in Sandy Bridge... + } +} + +&external_label("OPENSSL_ia32cap_P"); + # void RC4(RC4_KEY *key,size_t len,const unsigned char *inp,unsigned char *out); &function_begin("RC4"); &mov ($dat,&wparam(0)); # load key schedule pointer @@ -94,11 +184,56 @@ sub RC4_loop { &and ($ty,-4); # how many 4-byte chunks? &jz (&label("loop1")); + &test ($ty,-8); + &mov (&wparam(3),$out); # $out as accumulator in these loops + &jz (&label("go4loop4")); + + &picmeup($out,"OPENSSL_ia32cap_P"); + &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] + &jnc (&label("go4loop4")); + + &mov ($out,&wparam(3)) if (!$alt); + &movd ("mm7",&wparam(3)) if ($alt); + &and ($ty,-8); + &lea ($ty,&DWP(-8,$inp,$ty)); + &mov (&DWP(-4,$dat),$ty); # save input+(len/8)*8-8 + + &$RC4_loop_mmx(-1); + &jmp(&label("loop_mmx_enter")); + + &set_label("loop_mmx",16); + &$RC4_loop_mmx(0); + &set_label("loop_mmx_enter"); + for ($i=1;$i<8;$i++) { &$RC4_loop_mmx($i); } + &mov ($ty,$yy); + &xor ($yy,$yy); # this is second key to Core2 + &mov (&LB($yy),&LB($ty)); # and Westmere performance... + &cmp ($inp,&DWP(-4,$dat)); + &lea ($inp,&DWP(8,$inp)); + &jb (&label("loop_mmx")); + + if ($alt) { + &movd ($out,"mm7"); + &pxor ("mm2","mm0"); + &psllq ("mm1",8); + &pxor ("mm1","mm2"); + &movq (&QWP(-8,$out,$inp),"mm1"); + } else { + &psllq ("mm1",56); + &pxor ("mm2","mm1"); + &movq (&QWP(-8,$out,$inp),"mm2"); + } + &emms (); + + &cmp ($inp,&wparam(1)); # compare to input+len + &je (&label("done")); + &jmp (&label("loop1")); + +&set_label("go4loop4",16); &lea ($ty,&DWP(-4,$inp,$ty)); &mov (&wparam(2),$ty); # save input+(len/4)*4-4 - &mov (&wparam(3),$out); # $out as accumulator in this loop - &set_label("loop4",16); + &set_label("loop4"); for ($i=0;$i<4;$i++) { RC4_loop($i); } &ror ($out,8); &xor ($out,&DWP(0,$inp)); @@ -151,7 +286,7 @@ sub RC4_loop { &set_label("done"); &dec (&LB($xx)); - &mov (&BP(-4,$dat),&LB($yy)); # save key->y + &mov (&DWP(-4,$dat),$yy); # save key->y &mov (&BP(-8,$dat),&LB($xx)); # save key->x &set_label("abort"); &function_end("RC4"); @@ -164,10 +299,8 @@ $idi="ebp"; $ido="ecx"; $idx="edx"; -&external_label("OPENSSL_ia32cap_P"); - # void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); -&function_begin("RC4_set_key"); +&function_begin("private_RC4_set_key"); &mov ($out,&wparam(0)); # load key &mov ($idi,&wparam(1)); # load len &mov ($inp,&wparam(2)); # load data @@ -245,7 +378,7 @@ $idx="edx"; &xor ("eax","eax"); &mov (&DWP(-8,$out),"eax"); # key->x=0; &mov (&DWP(-4,$out),"eax"); # key->y=0; -&function_end("RC4_set_key"); +&function_end("private_RC4_set_key"); # const char *RC4_options(void); &function_begin_B("RC4_options"); @@ -254,14 +387,21 @@ $idx="edx"; &blindpop("eax"); &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); &picmeup("edx","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"edx"),20); - &jnc (&label("skip")); - &add ("eax",12); - &set_label("skip"); + &mov ("edx",&DWP(0,"edx")); + &bt ("edx",20); + &jc (&label("1xchar")); + &bt ("edx",26); + &jnc (&label("ret")); + &add ("eax",25); + &ret (); +&set_label("1xchar"); + &add ("eax",12); +&set_label("ret"); &ret (); &set_label("opts",64); &asciz ("rc4(4x,int)"); &asciz ("rc4(1x,char)"); +&asciz ("rc4(8x,mmx)"); &asciz ("RC4 for x86, CRYPTOGAMS by "); &align (64); &function_end_B("RC4_options"); diff --git a/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.S b/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.S new file mode 100644 index 00000000..aab3c6db --- /dev/null +++ b/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.S @@ -0,0 +1,1259 @@ +.text +.align 16 + +.globl rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: + cmpq $0,%r9 + je .Labort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +.Lbody: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp .Loop + +.align 16 +.Loop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb .Loop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +.Lepilogue: +.Labort: + .byte 0xf3,0xc3 +.size rc4_md5_enc,.-rc4_md5_enc diff --git a/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl b/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl new file mode 100644 index 00000000..272fa91e --- /dev/null +++ b/app/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl @@ -0,0 +1,632 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# June 2011 +# +# This is RC4+MD5 "stitch" implementation. The idea, as spelled in +# http://download.intel.com/design/intarch/papers/323686.pdf, is that +# since both algorithms exhibit instruction-level parallelism, ILP, +# below theoretical maximum, interleaving them would allow to utilize +# processor resources better and achieve better performance. RC4 +# instruction sequence is virtually identical to rc4-x86_64.pl, which +# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin +# and Jim Guilford of Intel. MD5 is fresh implementation aiming to +# minimize register usage, which was used as "main thread" with RC4 +# weaved into it, one RC4 round per one MD5 round. In addition to the +# stiched subroutine the script can generate standalone replacement +# md5_block_asm_data_order and RC4. Below are performance numbers in +# cycles per processed byte, less is better, for these the standalone +# subroutines, sum of them, and stitched one: +# +# RC4 MD5 RC4+MD5 stitch gain +# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*) +# Core2 6.5 5.8 12.3 7.7 +60% +# Westmere 4.3 5.2 9.5 7.0 +36% +# Sandy Bridge 4.2 5.5 9.7 6.8 +43% +# Atom 9.3 6.5 15.8 11.1 +42% +# +# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement +# is +53%... + +my ($rc4,$md5)=(1,1); # what to generate? +my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), + # but its result is discarded. Idea here is + # to be able to use 'openssl speed rc4' for + # benchmarking the stitched subroutine... + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); + +if ($rc4 && !$md5) { + ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx"); + $func="RC4"; $nargs=4; +} elsif ($md5 && !$rc4) { + ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx"); + $func="md5_block_asm_data_order"; $nargs=3; +} else { + ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); + $func="rc4_md5_enc"; $nargs=6; + # void rc4_md5_enc( + # RC4_KEY *key, # + # const void *in0, # RC4 input + # void *out, # RC4 output + # MD5_CTX *ctx, # + # const void *inp, # MD5 input + # size_t len); # number of 64-byte blocks +} + +my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, + 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, + 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, + 0x6b901122,0xfd987193,0xa679438e,0x49b40821, + + 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, + 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, + 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, + 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, + + 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, + 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, + 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, + 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, + + 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, + 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, + 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, + 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 ); + +my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers +my $tmp="%r12d"; + +my @XX=("%rbp","%rsi"); # RC4 registers +my @TX=("%rax","%rbx"); +my $YY="%rcx"; +my $TY="%rdx"; + +my $MOD=32; # 16, 32 or 64 + +$code.=<<___; +.text +.align 16 + +.globl $func +.type $func,\@function,$nargs +$func: + cmp \$0,$len + je .Labort + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + sub \$40,%rsp +.Lbody: +___ +if ($rc4) { +$code.=<<___; +$D#md5# mov $ctx,%r11 # reassign arguments + mov $len,%r12 + mov $in0,%r13 + mov $out,%r14 +$D#md5# mov $inp,%r15 +___ + $ctx="%r11" if ($md5); # reassign arguments + $len="%r12"; + $in0="%r13"; + $out="%r14"; + $inp="%r15" if ($md5); + $inp=$in0 if (!$md5); +$code.=<<___; + xor $XX[0],$XX[0] + xor $YY,$YY + + lea 8($dat),$dat + mov -8($dat),$XX[0]#b + mov -4($dat),$YY#b + + inc $XX[0]#b + sub $in0,$out + movl ($dat,$XX[0],4),$TX[0]#d +___ +$code.=<<___ if (!$md5); + xor $TX[1],$TX[1] + test \$-128,$len + jz .Loop1 + sub $XX[0],$TX[1] + and \$`$MOD-1`,$TX[1] + jz .Loop${MOD}_is_hot + sub $TX[1],$len +.Loop${MOD}_warmup: + add $TX[0]#b,$YY#b + movl ($dat,$YY,4),$TY#d + movl $TX[0]#d,($dat,$YY,4) + movl $TY#d,($dat,$XX[0],4) + add $TY#b,$TX[0]#b + inc $XX[0]#b + movl ($dat,$TX[0],4),$TY#d + movl ($dat,$XX[0],4),$TX[0]#d + xorb ($in0),$TY#b + movb $TY#b,($out,$in0) + lea 1($in0),$in0 + dec $TX[1] + jnz .Loop${MOD}_warmup + + mov $YY,$TX[1] + xor $YY,$YY + mov $TX[1]#b,$YY#b + +.Loop${MOD}_is_hot: + mov $len,32(%rsp) # save original $len + shr \$6,$len # number of 64-byte blocks +___ + if ($D && !$md5) { # stitch in dummy MD5 + $md5=1; + $ctx="%r11"; + $inp="%r15"; + $code.=<<___; + mov %rsp,$ctx + mov $in0,$inp +___ + } +} +$code.=<<___; +#rc4# add $TX[0]#b,$YY#b +#rc4# lea ($dat,$XX[0],4),$XX[1] + shl \$6,$len + add $inp,$len # pointer to the end of input + mov $len,16(%rsp) + +#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX +#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX +#md5# mov 1*4($ctx),$V[1] +#md5# mov 2*4($ctx),$V[2] +#md5# mov 3*4($ctx),$V[3] + jmp .Loop + +.align 16 +.Loop: +#md5# mov $V[0],0*4(%rsp) # put aside current hash value +#md5# mov $V[1],1*4(%rsp) +#md5# mov $V[2],2*4(%rsp) +#md5# mov $V[3],$tmp # forward reference +#md5# mov $V[3],3*4(%rsp) +___ + +sub R0 { + my ($i,$a,$b,$c,$d)=@_; + my @rot0=(7,12,17,22); + my $j=$i%16; + my $k=$i%$MOD; + my $xmm="%xmm".($j&1); + $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15); + $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); + $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); + $code.=<<___; +#rc4# movl ($dat,$YY,4),$TY#d +#md5# xor $c,$tmp +#rc4# movl $TX[0]#d,($dat,$YY,4) +#md5# and $b,$tmp +#md5# add 4*`$j`($inp),$a +#rc4# add $TY#b,$TX[0]#b +#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d +#md5# add \$$K[$i],$a +#md5# xor $d,$tmp +#rc4# movz $TX[0]#b,$TX[0]#d +#rc4# movl $TY#d,4*$k($XX[1]) +#md5# add $tmp,$a +#rc4# add $TX[1]#b,$YY#b +#md5# rol \$$rot0[$j%4],$a +#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference +#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n +#md5# add $b,$a +___ + $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); + mov $YY,$XX[1] + xor $YY,$YY # keyword to partial register + mov $XX[1]#b,$YY#b + lea ($dat,$XX[0],4),$XX[1] +___ + $code.=<<___ if ($rc4 && $j==15); + psllq \$8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 +___ +} +sub R1 { + my ($i,$a,$b,$c,$d)=@_; + my @rot1=(5,9,14,20); + my $j=$i%16; + my $k=$i%$MOD; + my $xmm="%xmm".($j&1); + $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15); + $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); + $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); + $code.=<<___; +#rc4# movl ($dat,$YY,4),$TY#d +#md5# xor $b,$tmp +#rc4# movl $TX[0]#d,($dat,$YY,4) +#md5# and $d,$tmp +#md5# add 4*`((1+5*$j)%16)`($inp),$a +#rc4# add $TY#b,$TX[0]#b +#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d +#md5# add \$$K[$i],$a +#md5# xor $c,$tmp +#rc4# movz $TX[0]#b,$TX[0]#d +#rc4# movl $TY#d,4*$k($XX[1]) +#md5# add $tmp,$a +#rc4# add $TX[1]#b,$YY#b +#md5# rol \$$rot1[$j%4],$a +#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference +#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n +#md5# add $b,$a +___ + $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); + mov $YY,$XX[1] + xor $YY,$YY # keyword to partial register + mov $XX[1]#b,$YY#b + lea ($dat,$XX[0],4),$XX[1] +___ + $code.=<<___ if ($rc4 && $j==15); + psllq \$8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 +___ +} +sub R2 { + my ($i,$a,$b,$c,$d)=@_; + my @rot2=(4,11,16,23); + my $j=$i%16; + my $k=$i%$MOD; + my $xmm="%xmm".($j&1); + $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15); + $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); + $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); + $code.=<<___; +#rc4# movl ($dat,$YY,4),$TY#d +#md5# xor $c,$tmp +#rc4# movl $TX[0]#d,($dat,$YY,4) +#md5# xor $b,$tmp +#md5# add 4*`((5+3*$j)%16)`($inp),$a +#rc4# add $TY#b,$TX[0]#b +#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d +#md5# add \$$K[$i],$a +#rc4# movz $TX[0]#b,$TX[0]#d +#md5# add $tmp,$a +#rc4# movl $TY#d,4*$k($XX[1]) +#rc4# add $TX[1]#b,$YY#b +#md5# rol \$$rot2[$j%4],$a +#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference +#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n +#md5# add $b,$a +___ + $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); + mov $YY,$XX[1] + xor $YY,$YY # keyword to partial register + mov $XX[1]#b,$YY#b + lea ($dat,$XX[0],4),$XX[1] +___ + $code.=<<___ if ($rc4 && $j==15); + psllq \$8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 +___ +} +sub R3 { + my ($i,$a,$b,$c,$d)=@_; + my @rot3=(6,10,15,21); + my $j=$i%16; + my $k=$i%$MOD; + my $xmm="%xmm".($j&1); + $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15); + $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); + $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); + $code.=<<___; +#rc4# movl ($dat,$YY,4),$TY#d +#md5# xor $d,$tmp +#rc4# movl $TX[0]#d,($dat,$YY,4) +#md5# or $b,$tmp +#md5# add 4*`((7*$j)%16)`($inp),$a +#rc4# add $TY#b,$TX[0]#b +#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d +#md5# add \$$K[$i],$a +#rc4# movz $TX[0]#b,$TX[0]#d +#md5# xor $c,$tmp +#rc4# movl $TY#d,4*$k($XX[1]) +#md5# add $tmp,$a +#rc4# add $TX[1]#b,$YY#b +#md5# rol \$$rot3[$j%4],$a +#md5# mov \$-1,$tmp # forward reference +#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n +#md5# add $b,$a +___ + $code.=<<___ if ($rc4 && $j==15); + mov $XX[0],$XX[1] + xor $XX[0],$XX[0] # keyword to partial register + mov $XX[1]#b,$XX[0]#b + mov $YY,$XX[1] + xor $YY,$YY # keyword to partial register + mov $XX[1]#b,$YY#b + lea ($dat,$XX[0],4),$XX[1] + psllq \$8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 +___ +} + +my $i=0; +for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } +for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } +for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } +for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } + +$code.=<<___; +#md5# add 0*4(%rsp),$V[0] # accumulate hash value +#md5# add 1*4(%rsp),$V[1] +#md5# add 2*4(%rsp),$V[2] +#md5# add 3*4(%rsp),$V[3] + +#rc4# movdqu %xmm2,($out,$in0) # write RC4 output +#rc4# movdqu %xmm3,16($out,$in0) +#rc4# movdqu %xmm4,32($out,$in0) +#rc4# movdqu %xmm5,48($out,$in0) +#md5# lea 64($inp),$inp +#rc4# lea 64($in0),$in0 + cmp 16(%rsp),$inp # are we done? + jb .Loop + +#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX +#rc4# sub $TX[0]#b,$YY#b # correct $YY +#md5# mov $V[0],0*4($len) # write MD5_CTX +#md5# mov $V[1],1*4($len) +#md5# mov $V[2],2*4($len) +#md5# mov $V[3],3*4($len) +___ +$code.=<<___ if ($rc4 && (!$md5 || $D)); + mov 32(%rsp),$len # restore original $len + and \$63,$len # remaining bytes + jnz .Loop1 + jmp .Ldone + +.align 16 +.Loop1: + add $TX[0]#b,$YY#b + movl ($dat,$YY,4),$TY#d + movl $TX[0]#d,($dat,$YY,4) + movl $TY#d,($dat,$XX[0],4) + add $TY#b,$TX[0]#b + inc $XX[0]#b + movl ($dat,$TX[0],4),$TY#d + movl ($dat,$XX[0],4),$TX[0]#d + xorb ($in0),$TY#b + movb $TY#b,($out,$in0) + lea 1($in0),$in0 + dec $len + jnz .Loop1 + +.Ldone: +___ +$code.=<<___; +#rc4# sub \$1,$XX[0]#b +#rc4# movl $XX[0]#d,-8($dat) +#rc4# movl $YY#d,-4($dat) + + mov 40(%rsp),%r15 + mov 48(%rsp),%r14 + mov 56(%rsp),%r13 + mov 64(%rsp),%r12 + mov 72(%rsp),%rbp + mov 80(%rsp),%rbx + lea 88(%rsp),%rsp +.Lepilogue: +.Labort: + ret +.size $func,.-$func +___ + +if ($rc4 && $D) { # sole purpose of this section is to provide + # option to use the generated module as drop-in + # replacement for rc4-x86_64.pl for debugging + # and testing purposes... +my ($idx,$ido)=("%r8","%r9"); +my ($dat,$len,$inp)=("%rdi","%rsi","%rdx"); + +$code.=<<___; +.globl RC4_set_key +.type RC4_set_key,\@function,3 +.align 16 +RC4_set_key: + lea 8($dat),$dat + lea ($inp,$len),$inp + neg $len + mov $len,%rcx + xor %eax,%eax + xor $ido,$ido + xor %r10,%r10 + xor %r11,%r11 + jmp .Lw1stloop + +.align 16 +.Lw1stloop: + mov %eax,($dat,%rax,4) + add \$1,%al + jnc .Lw1stloop + + xor $ido,$ido + xor $idx,$idx +.align 16 +.Lw2ndloop: + mov ($dat,$ido,4),%r10d + add ($inp,$len,1),$idx#b + add %r10b,$idx#b + add \$1,$len + mov ($dat,$idx,4),%r11d + cmovz %rcx,$len + mov %r10d,($dat,$idx,4) + mov %r11d,($dat,$ido,4) + add \$1,$ido#b + jnc .Lw2ndloop + + xor %eax,%eax + mov %eax,-8($dat) + mov %eax,-4($dat) + ret +.size RC4_set_key,.-RC4_set_key + +.globl RC4_options +.type RC4_options,\@abi-omnipotent +.align 16 +RC4_options: + lea .Lopts(%rip),%rax + ret +.align 64 +.Lopts: +.asciz "rc4(64x,int)" +.align 64 +.size RC4_options,.-RC4_options +___ +} +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +my $rec="%rcx"; +my $frame="%rdx"; +my $context="%r8"; +my $disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lbody(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lbody + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov 40(%rax),%r15 + mov 48(%rax),%r14 + mov 56(%rax),%r13 + mov 64(%rax),%r12 + mov 72(%rax),%rbp + mov 80(%rax),%rbx + lea 88(%rax),%rax + + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R12 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_$func + .rva .LSEH_end_$func + .rva .LSEH_info_$func + +.section .xdata +.align 8 +.LSEH_info_$func: + .byte 9,0,0,0 + .rva se_handler +___ +} + +sub reg_part { +my ($reg,$conv)=@_; + if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } + elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } + elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } + elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } + return $reg; +} + +$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/pinsrw\s+\$0,/movd /gm; + +$code =~ s/#md5#//gm if ($md5); +$code =~ s/#rc4#//gm if ($rc4); + +print $code; + +close STDOUT; diff --git a/app/openssl/crypto/rc4/asm/rc4-parisc.pl b/app/openssl/crypto/rc4/asm/rc4-parisc.pl new file mode 100644 index 00000000..ad7e6565 --- /dev/null +++ b/app/openssl/crypto/rc4/asm/rc4-parisc.pl @@ -0,0 +1,314 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# RC4 for PA-RISC. + +# June 2009. +# +# Performance is 33% better than gcc 3.2 generated code on PA-7100LC. +# For reference, [4x] unrolled loop is >40% faster than folded one. +# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement +# is believed to be not sufficient to justify the effort... +# +# Special thanks to polarhome.com for providing HP-UX account. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; +} else { + $LEVEL ="1.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; +} + +$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker + # [+ argument transfer] +$SZ=1; # defaults to RC4_CHAR +if (open CONF,"<${dir}../../opensslconf.h") { + while() { + if (m/#\s*define\s+RC4_INT\s+(.*)/) { + $SZ = ($1=~/char$/) ? 1 : 4; + last; + } + } + close CONF; +} + +if ($SZ==1) { # RC4_CHAR + $LD="ldb"; + $LDX="ldbx"; + $MKX="addl"; + $ST="stb"; +} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) + $LD="ldw"; + $LDX="ldwx,s"; + $MKX="sh2addl"; + $ST="stw"; +} + +$key="%r26"; +$len="%r25"; +$inp="%r24"; +$out="%r23"; + +@XX=("%r19","%r20"); +@TX=("%r21","%r22"); +$YY="%r28"; +$TY="%r29"; + +$acc="%r1"; +$ix="%r2"; +$iy="%r3"; +$dat0="%r4"; +$dat1="%r5"; +$rem="%r6"; +$mask="%r31"; + +sub unrolledloopbody { +for ($i=0;$i<4;$i++) { +$code.=<<___; + ldo 1($XX[0]),$XX[1] + `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` + and $mask,$XX[1],$XX[1] + $LDX $YY($key),$TY + $MKX $YY,$key,$ix + $LDX $XX[1]($key),$TX[1] + $MKX $XX[0],$key,$iy + $ST $TX[0],0($ix) + comclr,<> $XX[1],$YY,%r0 ; conditional + copy $TX[0],$TX[1] ; move + `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` + $ST $TY,0($iy) + addl $TX[0],$TY,$TY + addl $TX[1],$YY,$YY + and $mask,$TY,$TY + and $mask,$YY,$YY +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} } + +sub foldedloop { +my ($label,$count)=@_; +$code.=<<___; +$label + $MKX $YY,$key,$iy + $LDX $YY($key),$TY + $MKX $XX[0],$key,$ix + $ST $TX[0],0($iy) + ldo 1($XX[0]),$XX[0] + $ST $TY,0($ix) + addl $TX[0],$TY,$TY + ldbx $inp($out),$dat1 + and $mask,$TY,$TY + and $mask,$XX[0],$XX[0] + $LDX $TY($key),$acc + $LDX $XX[0]($key),$TX[0] + ldo 1($out),$out + xor $dat1,$acc,$acc + addl $TX[0],$YY,$YY + stb $acc,-1($out) + addib,<> -1,$count,$label ; $count is always small + and $mask,$YY,$YY +___ +} + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR +RC4 + .PROC + .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + + cmpib,*= 0,$len,L\$abort + sub $inp,$out,$inp ; distance between $inp and $out + + $LD `0*$SZ`($key),$XX[0] + $LD `1*$SZ`($key),$YY + ldo `2*$SZ`($key),$key + + ldi 0xff,$mask + ldi 3,$dat0 + + ldo 1($XX[0]),$XX[0] ; warm up loop + and $mask,$XX[0],$XX[0] + $LDX $XX[0]($key),$TX[0] + addl $TX[0],$YY,$YY + cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? + and $mask,$YY,$YY + + and,<> $out,$dat0,$rem ; is $out aligned? + b L\$alignedout + subi 4,$rem,$rem + sub $len,$rem,$len +___ +&foldedloop("L\$alignout",$rem); # process till $out is aligned + +$code.=<<___; +L\$alignedout ; $len is at least 4 here + and,<> $inp,$dat0,$acc ; is $inp aligned? + b L\$oop4 + sub $inp,$acc,$rem ; align $inp + + sh3addl $acc,%r0,$acc + subi 32,$acc,$acc + mtctl $acc,%cr11 ; load %sar with vshd align factor + ldwx $rem($out),$dat0 + ldo 4($rem),$rem +L\$oop4misalignedinp +___ +&unrolledloopbody(); +$code.=<<___; + $LDX $TY($key),$ix + ldwx $rem($out),$dat1 + ldo -4($len),$len + or $ix,$acc,$acc ; last piece, no need to dep + vshd $dat0,$dat1,$iy ; align data + copy $dat1,$dat0 + xor $iy,$acc,$acc + stw $acc,0($out) + cmpib,*<< 3,$len,L\$oop4misalignedinp + ldo 4($out),$out + cmpib,*= 0,$len,L\$done + nop + b L\$oop1 + nop + + .ALIGN 8 +L\$oop4 +___ +&unrolledloopbody(); +$code.=<<___; + $LDX $TY($key),$ix + ldwx $inp($out),$dat0 + ldo -4($len),$len + or $ix,$acc,$acc ; last piece, no need to dep + xor $dat0,$acc,$acc + stw $acc,0($out) + cmpib,*<< 3,$len,L\$oop4 + ldo 4($out),$out + cmpib,*= 0,$len,L\$done + nop +___ +&foldedloop("L\$oop1",$len); +$code.=<<___; +L\$done + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 + ldo -1($XX[0]),$XX[0] ; chill out loop + sub $YY,$TX[0],$YY + and $mask,$XX[0],$XX[0] + and $mask,$YY,$YY + $ST $XX[0],`-2*$SZ`($key) + $ST $YY,`-1*$SZ`($key) + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 +L\$abort + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND +___ + +$code.=<<___; + + .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR + .ALIGN 8 +private_RC4_set_key + .PROC + .CALLINFO NO_CALLS + .ENTRY + $ST %r0,`0*$SZ`($key) + $ST %r0,`1*$SZ`($key) + ldo `2*$SZ`($key),$key + copy %r0,@XX[0] +L\$1st + $ST @XX[0],0($key) + ldo 1(@XX[0]),@XX[0] + bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 + ldo $SZ($key),$key + + ldo `-256*$SZ`($key),$key ; rewind $key + addl $len,$inp,$inp ; $inp to point at the end + sub %r0,$len,%r23 ; inverse index + copy %r0,@XX[0] + copy %r0,@XX[1] + ldi 0xff,$mask + +L\$2nd + $LDX @XX[0]($key),@TX[0] + ldbx %r23($inp),@TX[1] + addi,nuv 1,%r23,%r23 ; increment and conditional + sub %r0,$len,%r23 ; inverse index + addl @TX[0],@XX[1],@XX[1] + addl @TX[1],@XX[1],@XX[1] + and $mask,@XX[1],@XX[1] + $MKX @XX[0],$key,$TY + $LDX @XX[1]($key),@TX[1] + $MKX @XX[1],$key,$YY + ldo 1(@XX[0]),@XX[0] + $ST @TX[0],0($YY) + bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 + $ST @TX[1],0($TY) + + bv,n (%r2) + .EXIT + nop + .PROCEND + + .EXPORT RC4_options,ENTRY + .ALIGN 8 +RC4_options + .PROC + .CALLINFO NO_CALLS + .ENTRY + blr %r0,%r28 + ldi 3,%r1 +L\$pic + andcm %r28,%r1,%r28 + bv (%r2) + .EXIT + ldo L\$opts-L\$pic(%r28),%r28 + .PROCEND + .ALIGN 8 +L\$opts + .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" + .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by " +___ +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); +$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); + +print $code; +close STDOUT; diff --git a/app/openssl/crypto/rc4/asm/rc4-s390x.pl b/app/openssl/crypto/rc4/asm/rc4-s390x.pl index 96681fa0..7528ece1 100644 --- a/app/openssl/crypto/rc4/asm/rc4-s390x.pl +++ b/app/openssl/crypto/rc4/asm/rc4-s390x.pl @@ -13,6 +13,29 @@ # "cluster" Address Generation Interlocks, so that one pipeline stall # resolves several dependencies. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 50% better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + $rp="%r14"; $sp="%r15"; $code=<<___; @@ -39,7 +62,12 @@ $code.=<<___; .type RC4,\@function .align 64 RC4: - stmg %r6,%r11,48($sp) + stm${g} %r6,%r11,6*$SIZE_T($sp) +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; llgc $XX[0],0($key) llgc $YY,1($key) la $XX[0],1($XX[0]) @@ -90,7 +118,7 @@ $code.=<<___; xgr $acc,$TX[1] stg $acc,0($out) la $out,8($out) - brct $cnt,.Loop8 + brctg $cnt,.Loop8 .Lshort: lghi $acc,7 @@ -122,7 +150,7 @@ $code.=<<___; ahi $XX[0],-1 stc $XX[0],0($key) stc $YY,1($key) - lmg %r6,%r11,48($sp) + lm${g} %r6,%r11,6*$SIZE_T($sp) br $rp .size RC4,.-RC4 .string "RC4 for s390x, CRYPTOGAMS by " @@ -143,11 +171,11 @@ $ikey="%r7"; $iinp="%r8"; $code.=<<___; -.globl RC4_set_key -.type RC4_set_key,\@function +.globl private_RC4_set_key +.type private_RC4_set_key,\@function .align 64 -RC4_set_key: - stmg %r6,%r8,48($sp) +private_RC4_set_key: + stm${g} %r6,%r8,6*$SIZE_T($sp) lhi $cnt,256 la $idx,0(%r0) sth $idx,0($key) @@ -180,9 +208,9 @@ RC4_set_key: la $iinp,0(%r0) j .L2ndloop .Ldone: - lmg %r6,%r8,48($sp) + lm${g} %r6,%r8,6*$SIZE_T($sp) br $rp -.size RC4_set_key,.-RC4_set_key +.size private_RC4_set_key,.-private_RC4_set_key ___ } @@ -203,3 +231,4 @@ RC4_options: ___ print $code; +close STDOUT; # force flush diff --git a/app/openssl/crypto/rc4/asm/rc4-x86_64.S b/app/openssl/crypto/rc4/asm/rc4-x86_64.S new file mode 100644 index 00000000..af161582 --- /dev/null +++ b/app/openssl/crypto/rc4/asm/rc4-x86_64.S @@ -0,0 +1,615 @@ +.text + + +.globl RC4 +.type RC4,@function +.align 16 +RC4: orq %rsi,%rsi + jne .Lentry + .byte 0xf3,0xc3 +.Lentry: + pushq %rbx + pushq %r12 + pushq %r13 +.Lprologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl + cmpl $-1,256(%rdi) + je .LRC4_CHAR + movl OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx + incb %r10b + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz .Lloop1 + btl $30,%r8d + jc .Lintel + andq $7,%rbx + leaq 1(%r10),%rsi + jz .Loop8 + subq %rbx,%r11 +.Loop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop8_warmup + + leaq 1(%r10),%rsi + jmp .Loop8 +.align 16 +.Loop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r13,%r12,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz .Loop8 + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + testq $-32,%r11 + jz .Lloop1 + andq $15,%rbx + jz .Loop16_is_hot + subq %rbx,%r11 +.Loop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +.Loop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp .Loop16_enter +.align 16 +.Loop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 +.Loop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz .Loop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lloop1: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %r11 + jnz .Lloop1 + jmp .Lexit + +.align 16 +.LRC4_CHAR: + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 + jz .Lcloop1 + jmp .Lcloop8 +.align 16 +.Lcloop8: + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov0 + movq %rax,%rbx +.Lcmov0: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov1 + movq %rbx,%rax +.Lcmov1: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov2 + movq %rax,%rbx +.Lcmov2: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov3 + movq %rbx,%rax +.Lcmov3: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov4 + movq %rax,%rbx +.Lcmov4: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov5 + movq %rbx,%rax +.Lcmov5: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov6 + movq %rax,%rbx +.Lcmov6: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov7 + movq %rbx,%rax +.Lcmov7: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 + jnz .Lcloop8 + cmpq $0,%r11 + jne .Lcloop1 + jmp .Lexit +.align 16 +.Lcloop1: + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 + jnz .Lcloop1 + jmp .Lexit + +.align 16 +.Lexit: + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) + + movq (%rsp),%r13 + movq 8(%rsp),%r12 + movq 16(%rsp),%rbx + addq $24,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size RC4,.-RC4 +.globl private_RC4_set_key +.type private_RC4_set_key,@function +.align 16 +private_RC4_set_key: + leaq 8(%rdi),%rdi + leaq (%rdx,%rsi,1),%rdx + negq %rsi + movq %rsi,%rcx + xorl %eax,%eax + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + + movl OPENSSL_ia32cap_P(%rip),%r8d + btl $20,%r8d + jc .Lc1stloop + jmp .Lw1stloop + +.align 16 +.Lw1stloop: + movl %eax,(%rdi,%rax,4) + addb $1,%al + jnc .Lw1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lw2ndloop: + movl (%rdi,%r9,4),%r10d + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movl (%rdi,%r8,4),%r11d + cmovzq %rcx,%rsi + movl %r10d,(%rdi,%r8,4) + movl %r11d,(%rdi,%r9,4) + addb $1,%r9b + jnc .Lw2ndloop + jmp .Lexit_key + +.align 16 +.Lc1stloop: + movb %al,(%rdi,%rax,1) + addb $1,%al + jnc .Lc1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lc2ndloop: + movb (%rdi,%r9,1),%r10b + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movb (%rdi,%r8,1),%r11b + jnz .Lcnowrap + movq %rcx,%rsi +.Lcnowrap: + movb %r10b,(%rdi,%r8,1) + movb %r11b,(%rdi,%r9,1) + addb $1,%r9b + jnc .Lc2ndloop + movl $-1,256(%rdi) + +.align 16 +.Lexit_key: + xorl %eax,%eax + movl %eax,-8(%rdi) + movl %eax,-4(%rdi) + .byte 0xf3,0xc3 +.size private_RC4_set_key,.-private_RC4_set_key + +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + leaq .Lopts(%rip),%rax + movl OPENSSL_ia32cap_P(%rip),%edx + btl $20,%edx + jc .L8xchar + btl $30,%edx + jnc .Ldone + addq $25,%rax + .byte 0xf3,0xc3 +.L8xchar: + addq $12,%rax +.Ldone: + .byte 0xf3,0xc3 +.align 64 +.Lopts: +.byte 114,99,52,40,56,120,44,105,110,116,41,0 +.byte 114,99,52,40,56,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-RC4_options diff --git a/app/openssl/crypto/rc4/asm/rc4-x86_64.pl b/app/openssl/crypto/rc4/asm/rc4-x86_64.pl old mode 100755 new mode 100644 index 677be5fe..20722d3e --- a/app/openssl/crypto/rc4/asm/rc4-x86_64.pl +++ b/app/openssl/crypto/rc4/asm/rc4-x86_64.pl @@ -7,6 +7,8 @@ # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # +# July 2004 +# # 2.22x RC4 tune-up:-) It should be noted though that my hand [as in # "hand-coded assembler"] doesn't stand for the whole improvement # coefficient. It turned out that eliminating RC4_CHAR from config @@ -19,6 +21,8 @@ # to operate on partial registers, it turned out to be the best bet. # At least for AMD... How IA32E would perform remains to be seen... +# November 2004 +# # As was shown by Marc Bevand reordering of couple of load operations # results in even higher performance gain of 3.3x:-) At least on # Opteron... For reference, 1x in this case is RC4_CHAR C-code @@ -26,6 +30,8 @@ # Latter means that if you want to *estimate* what to expect from # *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz. +# November 2004 +# # Intel P4 EM64T core was found to run the AMD64 code really slow... # The only way to achieve comparable performance on P4 was to keep # RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to @@ -33,10 +39,14 @@ # on either AMD and Intel platforms, I implement both cases. See # rc4_skey.c for further details... +# April 2005 +# # P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing # those with add/sub results in 50% performance improvement of folded # loop... +# May 2005 +# # As was shown by Zou Nanhai loop unrolling can improve Intel EM64T # performance by >30% [unlike P4 32-bit case that is]. But this is # provided that loads are reordered even more aggressively! Both code @@ -46,10 +56,12 @@ # achieves respectful 432MBps on 2.8GHz processor now. For reference. # If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than # RC4_INT code-path. While if executed on Opteron, it's only 25% -# slower than the RC4_INT one [meaning that if CPU µ-arch detection +# slower than the RC4_INT one [meaning that if CPU µ-arch detection # is not implemented, then this final RC4_CHAR code-path should be # preferred, as it provides better *all-round* performance]. +# March 2007 +# # Intel Core2 was observed to perform poorly on both code paths:-( It # apparently suffers from some kind of partial register stall, which # occurs in 64-bit mode only [as virtually identical 32-bit loop was @@ -58,6 +70,37 @@ # fit for Core2 and therefore the code was modified to skip cloop8 on # this CPU. +# May 2010 +# +# Intel Westmere was observed to perform suboptimally. Adding yet +# another movzb to cloop1 improved performance by almost 50%! Core2 +# performance is improved too, but nominally... + +# May 2011 +# +# The only code path that was not modified is P4-specific one. Non-P4 +# Intel code path optimization is heavily based on submission by Maxim +# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used +# some of the ideas even in attempt to optmize the original RC4_INT +# code path... Current performance in cycles per processed byte (less +# is better) and improvement coefficients relative to previous +# version of this module are: +# +# Opteron 5.3/+0%(*) +# P4 6.5 +# Core2 6.2/+15%(**) +# Westmere 4.2/+60% +# Sandy Bridge 4.2/+120% +# Atom 9.3/+80% +# +# (*) But corresponding loop has less instructions, which should have +# positive effect on upcoming Bulldozer, which has one less ALU. +# For reference, Intel code runs at 6.8 cpb rate on Opteron. +# (**) Note that Core2 result is ~15% lower than corresponding result +# for 32-bit code, meaning that it's possible to improve it, +# but more than likely at the cost of the others (see rc4-586.pl +# to get the idea)... + $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } @@ -69,20 +112,18 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $dat="%rdi"; # arg1 $len="%rsi"; # arg2 $inp="%rdx"; # arg3 $out="%rcx"; # arg4 -@XX=("%r8","%r10"); -@TX=("%r9","%r11"); -$YY="%r12"; -$TY="%r13"; - +{ $code=<<___; .text +.extern OPENSSL_ia32cap_P .globl RC4 .type RC4,\@function,4 @@ -95,48 +136,173 @@ RC4: or $len,$len push %r12 push %r13 .Lprologue: + mov $len,%r11 + mov $inp,%r12 + mov $out,%r13 +___ +my $len="%r11"; # reassign input arguments +my $inp="%r12"; +my $out="%r13"; - add \$8,$dat - movl -8($dat),$XX[0]#d - movl -4($dat),$YY#d +my @XX=("%r10","%rsi"); +my @TX=("%rax","%rbx"); +my $YY="%rcx"; +my $TY="%rdx"; + +$code.=<<___; + xor $XX[0],$XX[0] + xor $YY,$YY + + lea 8($dat),$dat + mov -8($dat),$XX[0]#b + mov -4($dat),$YY#b cmpl \$-1,256($dat) je .LRC4_CHAR + mov OPENSSL_ia32cap_P(%rip),%r8d + xor $TX[1],$TX[1] inc $XX[0]#b + sub $XX[0],$TX[1] + sub $inp,$out movl ($dat,$XX[0],4),$TX[0]#d - test \$-8,$len + test \$-16,$len jz .Lloop1 - jmp .Lloop8 + bt \$30,%r8d # Intel CPU? + jc .Lintel + and \$7,$TX[1] + lea 1($XX[0]),$XX[1] + jz .Loop8 + sub $TX[1],$len +.Loop8_warmup: + add $TX[0]#b,$YY#b + movl ($dat,$YY,4),$TY#d + movl $TX[0]#d,($dat,$YY,4) + movl $TY#d,($dat,$XX[0],4) + add $TY#b,$TX[0]#b + inc $XX[0]#b + movl ($dat,$TX[0],4),$TY#d + movl ($dat,$XX[0],4),$TX[0]#d + xorb ($inp),$TY#b + movb $TY#b,($out,$inp) + lea 1($inp),$inp + dec $TX[1] + jnz .Loop8_warmup + + lea 1($XX[0]),$XX[1] + jmp .Loop8 .align 16 -.Lloop8: +.Loop8: ___ for ($i=0;$i<8;$i++) { +$code.=<<___ if ($i==7); + add \$8,$XX[1]#b +___ $code.=<<___; add $TX[0]#b,$YY#b - mov $XX[0],$XX[1] movl ($dat,$YY,4),$TY#d - ror \$8,%rax # ror is redundant when $i=0 - inc $XX[1]#b - movl ($dat,$XX[1],4),$TX[1]#d - cmp $XX[1],$YY movl $TX[0]#d,($dat,$YY,4) - cmove $TX[0],$TX[1] - movl $TY#d,($dat,$XX[0],4) + movl `4*($i==7?-1:$i)`($dat,$XX[1],4),$TX[1]#d + ror \$8,%r8 # ror is redundant when $i=0 + movl $TY#d,4*$i($dat,$XX[0],4) add $TX[0]#b,$TY#b - movb ($dat,$TY,4),%al + movb ($dat,$TY,4),%r8b ___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +push(@TX,shift(@TX)); #push(@XX,shift(@XX)); # "rotate" registers } $code.=<<___; - ror \$8,%rax + add \$8,$XX[0]#b + ror \$8,%r8 sub \$8,$len - xor ($inp),%rax - add \$8,$inp - mov %rax,($out) - add \$8,$out + xor ($inp),%r8 + mov %r8,($out,$inp) + lea 8($inp),$inp test \$-8,$len - jnz .Lloop8 + jnz .Loop8 + cmp \$0,$len + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + test \$-32,$len + jz .Lloop1 + and \$15,$TX[1] + jz .Loop16_is_hot + sub $TX[1],$len +.Loop16_warmup: + add $TX[0]#b,$YY#b + movl ($dat,$YY,4),$TY#d + movl $TX[0]#d,($dat,$YY,4) + movl $TY#d,($dat,$XX[0],4) + add $TY#b,$TX[0]#b + inc $XX[0]#b + movl ($dat,$TX[0],4),$TY#d + movl ($dat,$XX[0],4),$TX[0]#d + xorb ($inp),$TY#b + movb $TY#b,($out,$inp) + lea 1($inp),$inp + dec $TX[1] + jnz .Loop16_warmup + + mov $YY,$TX[1] + xor $YY,$YY + mov $TX[1]#b,$YY#b + +.Loop16_is_hot: + lea ($dat,$XX[0],4),$XX[1] +___ +sub RC4_loop { + my $i=shift; + my $j=$i<0?0:$i; + my $xmm="%xmm".($j&1); + + $code.=" add \$16,$XX[0]#b\n" if ($i==15); + $code.=" movdqu ($inp),%xmm2\n" if ($i==15); + $code.=" add $TX[0]#b,$YY#b\n" if ($i<=0); + $code.=" movl ($dat,$YY,4),$TY#d\n"; + $code.=" pxor %xmm0,%xmm2\n" if ($i==0); + $code.=" psllq \$8,%xmm1\n" if ($i==0); + $code.=" pxor $xmm,$xmm\n" if ($i<=1); + $code.=" movl $TX[0]#d,($dat,$YY,4)\n"; + $code.=" add $TY#b,$TX[0]#b\n"; + $code.=" movl `4*($j+1)`($XX[1]),$TX[1]#d\n" if ($i<15); + $code.=" movz $TX[0]#b,$TX[0]#d\n"; + $code.=" movl $TY#d,4*$j($XX[1])\n"; + $code.=" pxor %xmm1,%xmm2\n" if ($i==0); + $code.=" lea ($dat,$XX[0],4),$XX[1]\n" if ($i==15); + $code.=" add $TX[1]#b,$YY#b\n" if ($i<15); + $code.=" pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n"; + $code.=" movdqu %xmm2,($out,$inp)\n" if ($i==0); + $code.=" lea 16($inp),$inp\n" if ($i==0); + $code.=" movl ($XX[1]),$TX[1]#d\n" if ($i==15); +} + RC4_loop(-1); +$code.=<<___; + jmp .Loop16_enter +.align 16 +.Loop16: +___ + +for ($i=0;$i<16;$i++) { + $code.=".Loop16_enter:\n" if ($i==1); + RC4_loop($i); + push(@TX,shift(@TX)); # "rotate" registers +} +$code.=<<___; + mov $YY,$TX[1] + xor $YY,$YY # keyword to partial register + sub \$16,$len + mov $TX[1]#b,$YY#b + test \$-16,$len + jnz .Loop16 + + psllq \$8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,($out,$inp) + lea 16($inp),$inp + cmp \$0,$len jne .Lloop1 jmp .Lexit @@ -152,9 +318,8 @@ $code.=<<___; movl ($dat,$TX[0],4),$TY#d movl ($dat,$XX[0],4),$TX[0]#d xorb ($inp),$TY#b - inc $inp - movb $TY#b,($out) - inc $out + movb $TY#b,($out,$inp) + lea 1($inp),$inp dec $len jnz .Lloop1 jmp .Lexit @@ -165,13 +330,11 @@ $code.=<<___; movzb ($dat,$XX[0]),$TX[0]#d test \$-8,$len jz .Lcloop1 - cmpl \$0,260($dat) - jnz .Lcloop1 jmp .Lcloop8 .align 16 .Lcloop8: - mov ($inp),%eax - mov 4($inp),%ebx + mov ($inp),%r8d + mov 4($inp),%r9d ___ # unroll 2x4-wise, because 64-bit rotates kill Intel P4... for ($i=0;$i<4;$i++) { @@ -188,8 +351,8 @@ $code.=<<___; mov $TX[0],$TX[1] .Lcmov$i: add $TX[0]#b,$TY#b - xor ($dat,$TY),%al - ror \$8,%eax + xor ($dat,$TY),%r8b + ror \$8,%r8d ___ push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers } @@ -207,16 +370,16 @@ $code.=<<___; mov $TX[0],$TX[1] .Lcmov$i: add $TX[0]#b,$TY#b - xor ($dat,$TY),%bl - ror \$8,%ebx + xor ($dat,$TY),%r9b + ror \$8,%r9d ___ push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers } $code.=<<___; lea -8($len),$len - mov %eax,($out) + mov %r8d,($out) lea 8($inp),$inp - mov %ebx,4($out) + mov %r9d,4($out) lea 8($out),$out test \$-8,$len @@ -229,6 +392,7 @@ $code.=<<___; .align 16 .Lcloop1: add $TX[0]#b,$YY#b + movzb $YY#b,$YY#d movzb ($dat,$YY),$TY#d movb $TX[0]#b,($dat,$YY) movb $TY#b,($dat,$XX[0]) @@ -260,16 +424,16 @@ $code.=<<___; ret .size RC4,.-RC4 ___ +} $idx="%r8"; $ido="%r9"; $code.=<<___; -.extern OPENSSL_ia32cap_P -.globl RC4_set_key -.type RC4_set_key,\@function,3 +.globl private_RC4_set_key +.type private_RC4_set_key,\@function,3 .align 16 -RC4_set_key: +private_RC4_set_key: lea 8($dat),$dat lea ($inp,$len),$inp neg $len @@ -280,12 +444,9 @@ RC4_set_key: xor %r11,%r11 mov OPENSSL_ia32cap_P(%rip),$idx#d - bt \$20,$idx#d - jnc .Lw1stloop - bt \$30,$idx#d - setc $ido#b - mov $ido#d,260($dat) - jmp .Lc1stloop + bt \$20,$idx#d # RC4_CHAR? + jc .Lc1stloop + jmp .Lw1stloop .align 16 .Lw1stloop: @@ -339,7 +500,7 @@ RC4_set_key: mov %eax,-8($dat) mov %eax,-4($dat) ret -.size RC4_set_key,.-RC4_set_key +.size private_RC4_set_key,.-private_RC4_set_key .globl RC4_options .type RC4_options,\@abi-omnipotent @@ -348,18 +509,20 @@ RC4_options: lea .Lopts(%rip),%rax mov OPENSSL_ia32cap_P(%rip),%edx bt \$20,%edx - jnc .Ldone - add \$12,%rax + jc .L8xchar bt \$30,%edx jnc .Ldone - add \$13,%rax + add \$25,%rax + ret +.L8xchar: + add \$12,%rax .Ldone: ret .align 64 .Lopts: .asciz "rc4(8x,int)" .asciz "rc4(8x,char)" -.asciz "rc4(1x,char)" +.asciz "rc4(16x,int)" .asciz "RC4 for x86_64, CRYPTOGAMS by " .align 64 .size RC4_options,.-RC4_options @@ -482,22 +645,32 @@ key_se_handler: .rva .LSEH_end_RC4 .rva .LSEH_info_RC4 - .rva .LSEH_begin_RC4_set_key - .rva .LSEH_end_RC4_set_key - .rva .LSEH_info_RC4_set_key + .rva .LSEH_begin_private_RC4_set_key + .rva .LSEH_end_private_RC4_set_key + .rva .LSEH_info_private_RC4_set_key .section .xdata .align 8 .LSEH_info_RC4: .byte 9,0,0,0 .rva stream_se_handler -.LSEH_info_RC4_set_key: +.LSEH_info_private_RC4_set_key: .byte 9,0,0,0 .rva key_se_handler ___ } -$code =~ s/#([bwd])/$1/gm; +sub reg_part { +my ($reg,$conv)=@_; + if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } + elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } + elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } + elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } + return $reg; +} + +$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; +$code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; diff --git a/app/openssl/crypto/rc4/rc4.h b/app/openssl/crypto/rc4/rc4.h index 29d1accc..88ceb46b 100644 --- a/app/openssl/crypto/rc4/rc4.h +++ b/app/openssl/crypto/rc4/rc4.h @@ -79,6 +79,7 @@ typedef struct rc4_key_st const char *RC4_options(void); void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); +void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); void RC4(RC4_KEY *key, size_t len, const unsigned char *indata, unsigned char *outdata); diff --git a/app/openssl/crypto/rc4/rc4_skey.c b/app/openssl/crypto/rc4/rc4_skey.c index b22c40b0..fda27636 100644 --- a/app/openssl/crypto/rc4/rc4_skey.c +++ b/app/openssl/crypto/rc4/rc4_skey.c @@ -85,7 +85,7 @@ const char *RC4_options(void) * Date: Wed, 14 Sep 1994 06:35:31 GMT */ -void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) +void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) { register RC4_INT tmp; register int id1,id2; @@ -104,40 +104,6 @@ void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) d[(n)]=d[id2]; \ d[id2]=tmp; } -#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) -# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - defined(__INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) - if (sizeof(RC4_INT) > 1) { - /* - * Unlike all other x86 [and x86_64] implementations, - * Intel P4 core [including EM64T] was found to perform - * poorly with wider RC4_INT. Performance improvement - * for IA-32 hand-coded assembler turned out to be 2.8x - * if re-coded for RC4_CHAR! It's however inappropriate - * to just switch to RC4_CHAR for x86[_64], as non-P4 - * implementations suffer from significant performance - * losses then, e.g. PIII exhibits >2x deterioration, - * and so does Opteron. In order to assure optimal - * all-round performance, let us [try to] detect P4 at - * run-time by checking upon HTT bit in CPU capability - * vector and set up compressed key schedule, which is - * recognized by correspondingly updated assembler - * module... - * - */ - if (OPENSSL_ia32cap_P & (1<<28)) { - unsigned char *cp=(unsigned char *)d; - - for (i=0;i<256;i++) cp[i]=i; - for (i=0;i<256;i++) SK_LOOP(cp,i); - /* mark schedule as compressed! */ - d[256/sizeof(RC4_INT)]=-1; - return; - } - } -# endif -#endif for (i=0; i < 256; i++) d[i]=i; for (i=0; i < 256; i+=4) { diff --git a/app/openssl/crypto/rc4/rc4_utl.c b/app/openssl/crypto/rc4/rc4_utl.c new file mode 100644 index 00000000..ab3f02fe --- /dev/null +++ b/app/openssl/crypto/rc4/rc4_utl.c @@ -0,0 +1,62 @@ +/* crypto/rc4/rc4_utl.c -*- mode:C; c-file-style: "eay" -*- */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include +#include + +void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) + { +#ifdef OPENSSL_FIPS + fips_cipher_abort(RC4); +#endif + private_RC4_set_key(key, len, data); + } diff --git a/app/openssl/crypto/rc4/rc4test.c b/app/openssl/crypto/rc4/rc4test.c index 633a79e7..4312605c 100644 --- a/app/openssl/crypto/rc4/rc4test.c +++ b/app/openssl/crypto/rc4/rc4test.c @@ -120,6 +120,12 @@ int main(int argc, char *argv[]) RC4_KEY key; unsigned char obuf[512]; +#if !defined(OPENSSL_PIC) + void OPENSSL_cpuid_setup(void); + + OPENSSL_cpuid_setup(); +#endif + for (i=0; i<6; i++) { RC4_set_key(&key,keys[i][0],&(keys[i][1])); diff --git a/app/openssl/crypto/ripemd/ripemd.h b/app/openssl/crypto/ripemd/ripemd.h index 5942eb61..189bd8c9 100644 --- a/app/openssl/crypto/ripemd/ripemd.h +++ b/app/openssl/crypto/ripemd/ripemd.h @@ -91,6 +91,9 @@ typedef struct RIPEMD160state_st unsigned int num; } RIPEMD160_CTX; +#ifdef OPENSSL_FIPS +int private_RIPEMD160_Init(RIPEMD160_CTX *c); +#endif int RIPEMD160_Init(RIPEMD160_CTX *c); int RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len); int RIPEMD160_Final(unsigned char *md, RIPEMD160_CTX *c); diff --git a/app/openssl/crypto/ripemd/rmd_dgst.c b/app/openssl/crypto/ripemd/rmd_dgst.c index 59b017f8..d8e72da5 100644 --- a/app/openssl/crypto/ripemd/rmd_dgst.c +++ b/app/openssl/crypto/ripemd/rmd_dgst.c @@ -59,6 +59,7 @@ #include #include "rmd_locl.h" #include +#include const char RMD160_version[]="RIPE-MD160" OPENSSL_VERSION_PTEXT; @@ -69,7 +70,7 @@ const char RMD160_version[]="RIPE-MD160" OPENSSL_VERSION_PTEXT; void ripemd160_block(RIPEMD160_CTX *c, unsigned long *p,size_t num); # endif -int RIPEMD160_Init(RIPEMD160_CTX *c) +fips_md_init(RIPEMD160) { memset (c,0,sizeof(*c)); c->A=RIPEMD160_A; @@ -104,21 +105,21 @@ void ripemd160_block_data_order (RIPEMD160_CTX *ctx, const void *p, size_t num) A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E; - HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; - RIP1(A,B,C,D,E,WL00,SL00); HOST_c2l(data,l); X( 2)=l; - RIP1(E,A,B,C,D,WL01,SL01); HOST_c2l(data,l); X( 3)=l; - RIP1(D,E,A,B,C,WL02,SL02); HOST_c2l(data,l); X( 4)=l; - RIP1(C,D,E,A,B,WL03,SL03); HOST_c2l(data,l); X( 5)=l; - RIP1(B,C,D,E,A,WL04,SL04); HOST_c2l(data,l); X( 6)=l; - RIP1(A,B,C,D,E,WL05,SL05); HOST_c2l(data,l); X( 7)=l; - RIP1(E,A,B,C,D,WL06,SL06); HOST_c2l(data,l); X( 8)=l; - RIP1(D,E,A,B,C,WL07,SL07); HOST_c2l(data,l); X( 9)=l; - RIP1(C,D,E,A,B,WL08,SL08); HOST_c2l(data,l); X(10)=l; - RIP1(B,C,D,E,A,WL09,SL09); HOST_c2l(data,l); X(11)=l; - RIP1(A,B,C,D,E,WL10,SL10); HOST_c2l(data,l); X(12)=l; - RIP1(E,A,B,C,D,WL11,SL11); HOST_c2l(data,l); X(13)=l; - RIP1(D,E,A,B,C,WL12,SL12); HOST_c2l(data,l); X(14)=l; - RIP1(C,D,E,A,B,WL13,SL13); HOST_c2l(data,l); X(15)=l; + (void)HOST_c2l(data,l); X( 0)=l;(void)HOST_c2l(data,l); X( 1)=l; + RIP1(A,B,C,D,E,WL00,SL00); (void)HOST_c2l(data,l); X( 2)=l; + RIP1(E,A,B,C,D,WL01,SL01); (void)HOST_c2l(data,l); X( 3)=l; + RIP1(D,E,A,B,C,WL02,SL02); (void)HOST_c2l(data,l); X( 4)=l; + RIP1(C,D,E,A,B,WL03,SL03); (void)HOST_c2l(data,l); X( 5)=l; + RIP1(B,C,D,E,A,WL04,SL04); (void)HOST_c2l(data,l); X( 6)=l; + RIP1(A,B,C,D,E,WL05,SL05); (void)HOST_c2l(data,l); X( 7)=l; + RIP1(E,A,B,C,D,WL06,SL06); (void)HOST_c2l(data,l); X( 8)=l; + RIP1(D,E,A,B,C,WL07,SL07); (void)HOST_c2l(data,l); X( 9)=l; + RIP1(C,D,E,A,B,WL08,SL08); (void)HOST_c2l(data,l); X(10)=l; + RIP1(B,C,D,E,A,WL09,SL09); (void)HOST_c2l(data,l); X(11)=l; + RIP1(A,B,C,D,E,WL10,SL10); (void)HOST_c2l(data,l); X(12)=l; + RIP1(E,A,B,C,D,WL11,SL11); (void)HOST_c2l(data,l); X(13)=l; + RIP1(D,E,A,B,C,WL12,SL12); (void)HOST_c2l(data,l); X(14)=l; + RIP1(C,D,E,A,B,WL13,SL13); (void)HOST_c2l(data,l); X(15)=l; RIP1(B,C,D,E,A,WL14,SL14); RIP1(A,B,C,D,E,WL15,SL15); diff --git a/app/openssl/crypto/ripemd/rmd_locl.h b/app/openssl/crypto/ripemd/rmd_locl.h index f14b346e..2bd8957d 100644 --- a/app/openssl/crypto/ripemd/rmd_locl.h +++ b/app/openssl/crypto/ripemd/rmd_locl.h @@ -88,11 +88,11 @@ void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num); #define HASH_FINAL RIPEMD160_Final #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ - ll=(c)->A; HOST_l2c(ll,(s)); \ - ll=(c)->B; HOST_l2c(ll,(s)); \ - ll=(c)->C; HOST_l2c(ll,(s)); \ - ll=(c)->D; HOST_l2c(ll,(s)); \ - ll=(c)->E; HOST_l2c(ll,(s)); \ + ll=(c)->A; (void)HOST_l2c(ll,(s)); \ + ll=(c)->B; (void)HOST_l2c(ll,(s)); \ + ll=(c)->C; (void)HOST_l2c(ll,(s)); \ + ll=(c)->D; (void)HOST_l2c(ll,(s)); \ + ll=(c)->E; (void)HOST_l2c(ll,(s)); \ } while (0) #define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order diff --git a/app/openssl/crypto/rsa/rsa.h b/app/openssl/crypto/rsa/rsa.h index cf743436..5f269e57 100644 --- a/app/openssl/crypto/rsa/rsa.h +++ b/app/openssl/crypto/rsa/rsa.h @@ -222,12 +222,22 @@ struct rsa_st EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, -1, EVP_PKEY_CTRL_RSA_PADDING, \ pad, NULL) +#define EVP_PKEY_CTX_get_rsa_padding(ctx, ppad) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, -1, \ + EVP_PKEY_CTRL_GET_RSA_PADDING, 0, ppad) + #define EVP_PKEY_CTX_set_rsa_pss_saltlen(ctx, len) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ (EVP_PKEY_OP_SIGN|EVP_PKEY_OP_VERIFY), \ EVP_PKEY_CTRL_RSA_PSS_SALTLEN, \ len, NULL) +#define EVP_PKEY_CTX_get_rsa_pss_saltlen(ctx, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ + (EVP_PKEY_OP_SIGN|EVP_PKEY_OP_VERIFY), \ + EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN, \ + 0, plen) + #define EVP_PKEY_CTX_set_rsa_keygen_bits(ctx, bits) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_KEYGEN, \ EVP_PKEY_CTRL_RSA_KEYGEN_BITS, bits, NULL) @@ -236,11 +246,24 @@ struct rsa_st EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_KEYGEN, \ EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP, 0, pubexp) +#define EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTRL_RSA_MGF1_MD, 0, (void *)md) + +#define EVP_PKEY_CTX_get_rsa_mgf1_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTRL_GET_RSA_MGF1_MD, 0, (void *)pmd) + #define EVP_PKEY_CTRL_RSA_PADDING (EVP_PKEY_ALG_CTRL + 1) #define EVP_PKEY_CTRL_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 2) #define EVP_PKEY_CTRL_RSA_KEYGEN_BITS (EVP_PKEY_ALG_CTRL + 3) #define EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP (EVP_PKEY_ALG_CTRL + 4) +#define EVP_PKEY_CTRL_RSA_MGF1_MD (EVP_PKEY_ALG_CTRL + 5) + +#define EVP_PKEY_CTRL_GET_RSA_PADDING (EVP_PKEY_ALG_CTRL + 6) +#define EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 7) +#define EVP_PKEY_CTRL_GET_RSA_MGF1_MD (EVP_PKEY_ALG_CTRL + 8) #define RSA_PKCS1_PADDING 1 #define RSA_SSLV23_PADDING 2 @@ -257,7 +280,7 @@ struct rsa_st RSA * RSA_new(void); RSA * RSA_new_method(ENGINE *engine); -int RSA_size(const RSA *); +int RSA_size(const RSA *rsa); /* Deprecated version */ #ifndef OPENSSL_NO_DEPRECATED @@ -300,6 +323,16 @@ const RSA_METHOD *RSA_null_method(void); DECLARE_ASN1_ENCODE_FUNCTIONS_const(RSA, RSAPublicKey) DECLARE_ASN1_ENCODE_FUNCTIONS_const(RSA, RSAPrivateKey) +typedef struct rsa_pss_params_st + { + X509_ALGOR *hashAlgorithm; + X509_ALGOR *maskGenAlgorithm; + ASN1_INTEGER *saltLength; + ASN1_INTEGER *trailerField; + } RSA_PSS_PARAMS; + +DECLARE_ASN1_FUNCTIONS(RSA_PSS_PARAMS) + #ifndef OPENSSL_NO_FP_API int RSA_print_fp(FILE *fp, const RSA *r,int offset); #endif @@ -380,6 +413,14 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, const unsigned char *mHash, const EVP_MD *Hash, int sLen); +int RSA_verify_PKCS1_PSS_mgf1(RSA *rsa, const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, + const unsigned char *EM, int sLen); + +int RSA_padding_add_PKCS1_PSS_mgf1(RSA *rsa, unsigned char *EM, + const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, int sLen); + int RSA_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, CRYPTO_EX_free *free_func); int RSA_set_ex_data(RSA *r,int idx,void *arg); @@ -388,6 +429,25 @@ void *RSA_get_ex_data(const RSA *r, int idx); RSA *RSAPublicKey_dup(RSA *rsa); RSA *RSAPrivateKey_dup(RSA *rsa); +/* If this flag is set the RSA method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its responsibility + * to ensure the result is compliant. + */ + +#define RSA_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define RSA_FLAG_NON_FIPS_ALLOW 0x0400 +/* Application has decided PRNG is good enough to generate a key: don't + * check. + */ +#define RSA_FLAG_CHECKED 0x0800 + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -405,6 +465,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_PKEY_RSA_CTRL 143 #define RSA_F_PKEY_RSA_CTRL_STR 144 #define RSA_F_PKEY_RSA_SIGN 142 +#define RSA_F_PKEY_RSA_VERIFY 154 #define RSA_F_PKEY_RSA_VERIFYRECOVER 141 #define RSA_F_RSA_BUILTIN_KEYGEN 129 #define RSA_F_RSA_CHECK_KEY 123 @@ -413,6 +474,8 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_EAY_PUBLIC_DECRYPT 103 #define RSA_F_RSA_EAY_PUBLIC_ENCRYPT 104 #define RSA_F_RSA_GENERATE_KEY 105 +#define RSA_F_RSA_GENERATE_KEY_EX 155 +#define RSA_F_RSA_ITEM_VERIFY 156 #define RSA_F_RSA_MEMORY_LOCK 130 #define RSA_F_RSA_NEW_METHOD 106 #define RSA_F_RSA_NULL 124 @@ -424,6 +487,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_PADDING_ADD_NONE 107 #define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP 121 #define RSA_F_RSA_PADDING_ADD_PKCS1_PSS 125 +#define RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1 148 #define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_1 108 #define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_2 109 #define RSA_F_RSA_PADDING_ADD_SSLV23 110 @@ -436,8 +500,12 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_PADDING_CHECK_X931 128 #define RSA_F_RSA_PRINT 115 #define RSA_F_RSA_PRINT_FP 116 +#define RSA_F_RSA_PRIVATE_DECRYPT 150 +#define RSA_F_RSA_PRIVATE_ENCRYPT 151 #define RSA_F_RSA_PRIV_DECODE 137 #define RSA_F_RSA_PRIV_ENCODE 138 +#define RSA_F_RSA_PUBLIC_DECRYPT 152 +#define RSA_F_RSA_PUBLIC_ENCRYPT 153 #define RSA_F_RSA_PUB_DECODE 139 #define RSA_F_RSA_SETUP_BLINDING 136 #define RSA_F_RSA_SIGN 117 @@ -445,6 +513,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_VERIFY 119 #define RSA_F_RSA_VERIFY_ASN1_OCTET_STRING 120 #define RSA_F_RSA_VERIFY_PKCS1_PSS 126 +#define RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1 149 /* Reason codes. */ #define RSA_R_ALGORITHM_MISMATCH 100 @@ -470,19 +539,24 @@ void ERR_load_RSA_strings(void); #define RSA_R_INVALID_HEADER 137 #define RSA_R_INVALID_KEYBITS 145 #define RSA_R_INVALID_MESSAGE_LENGTH 131 +#define RSA_R_INVALID_MGF1_MD 156 #define RSA_R_INVALID_PADDING 138 #define RSA_R_INVALID_PADDING_MODE 141 +#define RSA_R_INVALID_PSS_PARAMETERS 149 #define RSA_R_INVALID_PSS_SALTLEN 146 +#define RSA_R_INVALID_SALT_LENGTH 150 #define RSA_R_INVALID_TRAILER 139 #define RSA_R_INVALID_X931_DIGEST 142 #define RSA_R_IQMP_NOT_INVERSE_OF_Q 126 #define RSA_R_KEY_SIZE_TOO_SMALL 120 #define RSA_R_LAST_OCTET_INVALID 134 #define RSA_R_MODULUS_TOO_LARGE 105 +#define RSA_R_NON_FIPS_RSA_METHOD 157 #define RSA_R_NO_PUBLIC_EXPONENT 140 #define RSA_R_NULL_BEFORE_BLOCK_MISSING 113 #define RSA_R_N_DOES_NOT_EQUAL_P_Q 127 #define RSA_R_OAEP_DECODING_ERROR 121 +#define RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE 158 #define RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 148 #define RSA_R_PADDING_CHECK_FAILED 114 #define RSA_R_P_NOT_PRIME 128 @@ -493,7 +567,12 @@ void ERR_load_RSA_strings(void); #define RSA_R_SSLV3_ROLLBACK_ATTACK 115 #define RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD 116 #define RSA_R_UNKNOWN_ALGORITHM_TYPE 117 +#define RSA_R_UNKNOWN_MASK_DIGEST 151 #define RSA_R_UNKNOWN_PADDING_TYPE 118 +#define RSA_R_UNKNOWN_PSS_DIGEST 152 +#define RSA_R_UNSUPPORTED_MASK_ALGORITHM 153 +#define RSA_R_UNSUPPORTED_MASK_PARAMETER 154 +#define RSA_R_UNSUPPORTED_SIGNATURE_TYPE 155 #define RSA_R_VALUE_MISSING 147 #define RSA_R_WRONG_SIGNATURE_LENGTH 119 diff --git a/app/openssl/crypto/rsa/rsa_ameth.c b/app/openssl/crypto/rsa/rsa_ameth.c index 8c320988..5a2062f9 100644 --- a/app/openssl/crypto/rsa/rsa_ameth.c +++ b/app/openssl/crypto/rsa/rsa_ameth.c @@ -265,6 +265,147 @@ static int rsa_priv_print(BIO *bp, const EVP_PKEY *pkey, int indent, return do_rsa_print(bp, pkey->pkey.rsa, indent, 1); } +static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg, + X509_ALGOR **pmaskHash) + { + const unsigned char *p; + int plen; + RSA_PSS_PARAMS *pss; + + *pmaskHash = NULL; + + if (!alg->parameter || alg->parameter->type != V_ASN1_SEQUENCE) + return NULL; + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + pss = d2i_RSA_PSS_PARAMS(NULL, &p, plen); + + if (!pss) + return NULL; + + if (pss->maskGenAlgorithm) + { + ASN1_TYPE *param = pss->maskGenAlgorithm->parameter; + if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) == NID_mgf1 + && param->type == V_ASN1_SEQUENCE) + { + p = param->value.sequence->data; + plen = param->value.sequence->length; + *pmaskHash = d2i_X509_ALGOR(NULL, &p, plen); + } + } + + return pss; + } + +static int rsa_pss_param_print(BIO *bp, RSA_PSS_PARAMS *pss, + X509_ALGOR *maskHash, int indent) + { + int rv = 0; + if (!pss) + { + if (BIO_puts(bp, " (INVALID PSS PARAMETERS)\n") <= 0) + return 0; + return 1; + } + if (BIO_puts(bp, "\n") <= 0) + goto err; + if (!BIO_indent(bp, indent, 128)) + goto err; + if (BIO_puts(bp, "Hash Algorithm: ") <= 0) + goto err; + + if (pss->hashAlgorithm) + { + if (i2a_ASN1_OBJECT(bp, pss->hashAlgorithm->algorithm) <= 0) + goto err; + } + else if (BIO_puts(bp, "sha1 (default)") <= 0) + goto err; + + if (BIO_puts(bp, "\n") <= 0) + goto err; + + if (!BIO_indent(bp, indent, 128)) + goto err; + + if (BIO_puts(bp, "Mask Algorithm: ") <= 0) + goto err; + if (pss->maskGenAlgorithm) + { + if (i2a_ASN1_OBJECT(bp, pss->maskGenAlgorithm->algorithm) <= 0) + goto err; + if (BIO_puts(bp, " with ") <= 0) + goto err; + if (maskHash) + { + if (i2a_ASN1_OBJECT(bp, maskHash->algorithm) <= 0) + goto err; + } + else if (BIO_puts(bp, "INVALID") <= 0) + goto err; + } + else if (BIO_puts(bp, "mgf1 with sha1 (default)") <= 0) + goto err; + BIO_puts(bp, "\n"); + + if (!BIO_indent(bp, indent, 128)) + goto err; + if (BIO_puts(bp, "Salt Length: 0x") <= 0) + goto err; + if (pss->saltLength) + { + if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0) + goto err; + } + else if (BIO_puts(bp, "0x14 (default)") <= 0) + goto err; + BIO_puts(bp, "\n"); + + if (!BIO_indent(bp, indent, 128)) + goto err; + if (BIO_puts(bp, "Trailer Field: 0x") <= 0) + goto err; + if (pss->trailerField) + { + if (i2a_ASN1_INTEGER(bp, pss->trailerField) <= 0) + goto err; + } + else if (BIO_puts(bp, "BC (default)") <= 0) + goto err; + BIO_puts(bp, "\n"); + + rv = 1; + + err: + return rv; + + } + +static int rsa_sig_print(BIO *bp, const X509_ALGOR *sigalg, + const ASN1_STRING *sig, + int indent, ASN1_PCTX *pctx) + { + if (OBJ_obj2nid(sigalg->algorithm) == NID_rsassaPss) + { + int rv; + RSA_PSS_PARAMS *pss; + X509_ALGOR *maskHash; + pss = rsa_pss_decode(sigalg, &maskHash); + rv = rsa_pss_param_print(bp, pss, maskHash, indent); + if (pss) + RSA_PSS_PARAMS_free(pss); + if (maskHash) + X509_ALGOR_free(maskHash); + if (!rv) + return 0; + } + else if (!sig && BIO_puts(bp, "\n") <= 0) + return 0; + if (sig) + return X509_signature_dump(bp, sig, indent); + return 1; + } static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) { @@ -310,6 +451,211 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) } +/* Customised RSA item verification routine. This is called + * when a signature is encountered requiring special handling. We + * currently only handle PSS. + */ + + +static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *sigalg, ASN1_BIT_STRING *sig, + EVP_PKEY *pkey) + { + int rv = -1; + int saltlen; + const EVP_MD *mgf1md = NULL, *md = NULL; + RSA_PSS_PARAMS *pss; + X509_ALGOR *maskHash; + EVP_PKEY_CTX *pkctx; + /* Sanity check: make sure it is PSS */ + if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); + return -1; + } + /* Decode PSS parameters */ + pss = rsa_pss_decode(sigalg, &maskHash); + + if (pss == NULL) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_PSS_PARAMETERS); + goto err; + } + /* Check mask and lookup mask hash algorithm */ + if (pss->maskGenAlgorithm) + { + if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) != NID_mgf1) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_ALGORITHM); + goto err; + } + if (!maskHash) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_PARAMETER); + goto err; + } + mgf1md = EVP_get_digestbyobj(maskHash->algorithm); + if (mgf1md == NULL) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_MASK_DIGEST); + goto err; + } + } + else + mgf1md = EVP_sha1(); + + if (pss->hashAlgorithm) + { + md = EVP_get_digestbyobj(pss->hashAlgorithm->algorithm); + if (md == NULL) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_PSS_DIGEST); + goto err; + } + } + else + md = EVP_sha1(); + + if (pss->saltLength) + { + saltlen = ASN1_INTEGER_get(pss->saltLength); + + /* Could perform more salt length sanity checks but the main + * RSA routines will trap other invalid values anyway. + */ + if (saltlen < 0) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_SALT_LENGTH); + goto err; + } + } + else + saltlen = 20; + + /* low-level routines support only trailer field 0xbc (value 1) + * and PKCS#1 says we should reject any other value anyway. + */ + if (pss->trailerField && ASN1_INTEGER_get(pss->trailerField) != 1) + { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_TRAILER); + goto err; + } + + /* We have all parameters now set up context */ + + if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey)) + goto err; + + if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_PSS_PADDING) <= 0) + goto err; + + if (EVP_PKEY_CTX_set_rsa_pss_saltlen(pkctx, saltlen) <= 0) + goto err; + + if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0) + goto err; + /* Carry on */ + rv = 2; + + err: + RSA_PSS_PARAMS_free(pss); + if (maskHash) + X509_ALGOR_free(maskHash); + return rv; + } + +static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *alg1, X509_ALGOR *alg2, + ASN1_BIT_STRING *sig) + { + int pad_mode; + EVP_PKEY_CTX *pkctx = ctx->pctx; + if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0) + return 0; + if (pad_mode == RSA_PKCS1_PADDING) + return 2; + if (pad_mode == RSA_PKCS1_PSS_PADDING) + { + const EVP_MD *sigmd, *mgf1md; + RSA_PSS_PARAMS *pss = NULL; + X509_ALGOR *mgf1alg = NULL; + ASN1_STRING *os1 = NULL, *os2 = NULL; + EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx); + int saltlen, rv = 0; + sigmd = EVP_MD_CTX_md(ctx); + if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) + goto err; + if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen)) + goto err; + if (saltlen == -1) + saltlen = EVP_MD_size(sigmd); + else if (saltlen == -2) + { + saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2; + if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) + saltlen--; + } + pss = RSA_PSS_PARAMS_new(); + if (!pss) + goto err; + if (saltlen != 20) + { + pss->saltLength = ASN1_INTEGER_new(); + if (!pss->saltLength) + goto err; + if (!ASN1_INTEGER_set(pss->saltLength, saltlen)) + goto err; + } + if (EVP_MD_type(sigmd) != NID_sha1) + { + pss->hashAlgorithm = X509_ALGOR_new(); + if (!pss->hashAlgorithm) + goto err; + X509_ALGOR_set_md(pss->hashAlgorithm, sigmd); + } + if (EVP_MD_type(mgf1md) != NID_sha1) + { + ASN1_STRING *stmp = NULL; + /* need to embed algorithm ID inside another */ + mgf1alg = X509_ALGOR_new(); + X509_ALGOR_set_md(mgf1alg, mgf1md); + if (!ASN1_item_pack(mgf1alg, ASN1_ITEM_rptr(X509_ALGOR), + &stmp)) + goto err; + pss->maskGenAlgorithm = X509_ALGOR_new(); + if (!pss->maskGenAlgorithm) + goto err; + X509_ALGOR_set0(pss->maskGenAlgorithm, + OBJ_nid2obj(NID_mgf1), + V_ASN1_SEQUENCE, stmp); + } + /* Finally create string with pss parameter encoding. */ + if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os1)) + goto err; + if (alg2) + { + os2 = ASN1_STRING_dup(os1); + if (!os2) + goto err; + X509_ALGOR_set0(alg2, OBJ_nid2obj(NID_rsassaPss), + V_ASN1_SEQUENCE, os2); + } + X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_rsassaPss), + V_ASN1_SEQUENCE, os1); + os1 = os2 = NULL; + rv = 3; + err: + if (mgf1alg) + X509_ALGOR_free(mgf1alg); + if (pss) + RSA_PSS_PARAMS_free(pss); + if (os1) + ASN1_STRING_free(os1); + return rv; + + } + return 2; + } const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = { @@ -335,10 +681,13 @@ const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = 0,0,0,0,0,0, + rsa_sig_print, int_rsa_free, rsa_pkey_ctrl, old_rsa_priv_decode, - old_rsa_priv_encode + old_rsa_priv_encode, + rsa_item_verify, + rsa_item_sign }, { diff --git a/app/openssl/crypto/rsa/rsa_asn1.c b/app/openssl/crypto/rsa/rsa_asn1.c index 4efca8cd..6ed5de3d 100644 --- a/app/openssl/crypto/rsa/rsa_asn1.c +++ b/app/openssl/crypto/rsa/rsa_asn1.c @@ -60,6 +60,7 @@ #include "cryptlib.h" #include #include +#include #include /* Override the default free and new methods */ @@ -96,6 +97,15 @@ ASN1_SEQUENCE_cb(RSAPublicKey, rsa_cb) = { ASN1_SIMPLE(RSA, e, BIGNUM), } ASN1_SEQUENCE_END_cb(RSA, RSAPublicKey) +ASN1_SEQUENCE(RSA_PSS_PARAMS) = { + ASN1_EXP_OPT(RSA_PSS_PARAMS, hashAlgorithm, X509_ALGOR,0), + ASN1_EXP_OPT(RSA_PSS_PARAMS, maskGenAlgorithm, X509_ALGOR,1), + ASN1_EXP_OPT(RSA_PSS_PARAMS, saltLength, ASN1_INTEGER,2), + ASN1_EXP_OPT(RSA_PSS_PARAMS, trailerField, ASN1_INTEGER,3) +} ASN1_SEQUENCE_END(RSA_PSS_PARAMS) + +IMPLEMENT_ASN1_FUNCTIONS(RSA_PSS_PARAMS) + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPrivateKey, RSAPrivateKey) IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPublicKey, RSAPublicKey) diff --git a/app/openssl/crypto/rsa/rsa_chk.c b/app/openssl/crypto/rsa/rsa_chk.c index 9d848db8..cc30e771 100644 --- a/app/openssl/crypto/rsa/rsa_chk.c +++ b/app/openssl/crypto/rsa/rsa_chk.c @@ -59,6 +59,12 @@ int RSA_check_key(const RSA *key) BN_CTX *ctx; int r; int ret=1; + + if (!key->p || !key->q || !key->n || !key->e || !key->d) + { + RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_VALUE_MISSING); + return 0; + } i = BN_new(); j = BN_new(); diff --git a/app/openssl/crypto/rsa/rsa_crpt.c b/app/openssl/crypto/rsa/rsa_crpt.c new file mode 100644 index 00000000..d3e44785 --- /dev/null +++ b/app/openssl/crypto/rsa/rsa_crpt.c @@ -0,0 +1,257 @@ +/* crypto/rsa/rsa_lib.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include +#include "cryptlib.h" +#include +#include +#include +#include +#ifndef OPENSSL_NO_ENGINE +#include +#endif + +int RSA_size(const RSA *r) + { + return(BN_num_bytes(r->n)); + } + +int RSA_public_encrypt(int flen, const unsigned char *from, unsigned char *to, + RSA *rsa, int padding) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_PUBLIC_ENCRYPT, RSA_R_NON_FIPS_RSA_METHOD); + return -1; + } +#endif + return(rsa->meth->rsa_pub_enc(flen, from, to, rsa, padding)); + } + +int RSA_private_encrypt(int flen, const unsigned char *from, unsigned char *to, + RSA *rsa, int padding) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_PRIVATE_ENCRYPT, RSA_R_NON_FIPS_RSA_METHOD); + return -1; + } +#endif + return(rsa->meth->rsa_priv_enc(flen, from, to, rsa, padding)); + } + +int RSA_private_decrypt(int flen, const unsigned char *from, unsigned char *to, + RSA *rsa, int padding) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_PRIVATE_DECRYPT, RSA_R_NON_FIPS_RSA_METHOD); + return -1; + } +#endif + return(rsa->meth->rsa_priv_dec(flen, from, to, rsa, padding)); + } + +int RSA_public_decrypt(int flen, const unsigned char *from, unsigned char *to, + RSA *rsa, int padding) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_PUBLIC_DECRYPT, RSA_R_NON_FIPS_RSA_METHOD); + return -1; + } +#endif + return(rsa->meth->rsa_pub_dec(flen, from, to, rsa, padding)); + } + +int RSA_flags(const RSA *r) + { + return((r == NULL)?0:r->meth->flags); + } + +void RSA_blinding_off(RSA *rsa) + { + if (rsa->blinding != NULL) + { + BN_BLINDING_free(rsa->blinding); + rsa->blinding=NULL; + } + rsa->flags &= ~RSA_FLAG_BLINDING; + rsa->flags |= RSA_FLAG_NO_BLINDING; + } + +int RSA_blinding_on(RSA *rsa, BN_CTX *ctx) + { + int ret=0; + + if (rsa->blinding != NULL) + RSA_blinding_off(rsa); + + rsa->blinding = RSA_setup_blinding(rsa, ctx); + if (rsa->blinding == NULL) + goto err; + + rsa->flags |= RSA_FLAG_BLINDING; + rsa->flags &= ~RSA_FLAG_NO_BLINDING; + ret=1; +err: + return(ret); + } + +static BIGNUM *rsa_get_public_exp(const BIGNUM *d, const BIGNUM *p, + const BIGNUM *q, BN_CTX *ctx) +{ + BIGNUM *ret = NULL, *r0, *r1, *r2; + + if (d == NULL || p == NULL || q == NULL) + return NULL; + + BN_CTX_start(ctx); + r0 = BN_CTX_get(ctx); + r1 = BN_CTX_get(ctx); + r2 = BN_CTX_get(ctx); + if (r2 == NULL) + goto err; + + if (!BN_sub(r1, p, BN_value_one())) goto err; + if (!BN_sub(r2, q, BN_value_one())) goto err; + if (!BN_mul(r0, r1, r2, ctx)) goto err; + + ret = BN_mod_inverse(NULL, d, r0, ctx); +err: + BN_CTX_end(ctx); + return ret; +} + +BN_BLINDING *RSA_setup_blinding(RSA *rsa, BN_CTX *in_ctx) +{ + BIGNUM local_n; + BIGNUM *e,*n; + BN_CTX *ctx; + BN_BLINDING *ret = NULL; + + if (in_ctx == NULL) + { + if ((ctx = BN_CTX_new()) == NULL) return 0; + } + else + ctx = in_ctx; + + BN_CTX_start(ctx); + e = BN_CTX_get(ctx); + if (e == NULL) + { + RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_MALLOC_FAILURE); + goto err; + } + + if (rsa->e == NULL) + { + e = rsa_get_public_exp(rsa->d, rsa->p, rsa->q, ctx); + if (e == NULL) + { + RSAerr(RSA_F_RSA_SETUP_BLINDING, RSA_R_NO_PUBLIC_EXPONENT); + goto err; + } + } + else + e = rsa->e; + + + if ((RAND_status() == 0) && rsa->d != NULL && rsa->d->d != NULL) + { + /* if PRNG is not properly seeded, resort to secret + * exponent as unpredictable seed */ + RAND_add(rsa->d->d, rsa->d->dmax * sizeof rsa->d->d[0], 0.0); + } + + if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) + { + /* Set BN_FLG_CONSTTIME flag */ + n = &local_n; + BN_with_flags(n, rsa->n, BN_FLG_CONSTTIME); + } + else + n = rsa->n; + + ret = BN_BLINDING_create_param(NULL, e, n, ctx, + rsa->meth->bn_mod_exp, rsa->_method_mod_n); + if (ret == NULL) + { + RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_BN_LIB); + goto err; + } + CRYPTO_THREADID_current(BN_BLINDING_thread_id(ret)); +err: + BN_CTX_end(ctx); + if (in_ctx == NULL) + BN_CTX_free(ctx); + if(rsa->e == NULL) + BN_free(e); + + return ret; +} diff --git a/app/openssl/crypto/rsa/rsa_eay.c b/app/openssl/crypto/rsa/rsa_eay.c index 7c941885..88ee2cb5 100644 --- a/app/openssl/crypto/rsa/rsa_eay.c +++ b/app/openssl/crypto/rsa/rsa_eay.c @@ -314,51 +314,56 @@ static BN_BLINDING *rsa_get_blinding(RSA *rsa, int *local, BN_CTX *ctx) return ret; } -static int rsa_blinding_convert(BN_BLINDING *b, int local, BIGNUM *f, - BIGNUM *r, BN_CTX *ctx) -{ - if (local) +static int rsa_blinding_convert(BN_BLINDING *b, BIGNUM *f, BIGNUM *unblind, + BN_CTX *ctx) + { + if (unblind == NULL) + /* Local blinding: store the unblinding factor + * in BN_BLINDING. */ return BN_BLINDING_convert_ex(f, NULL, b, ctx); else { - int ret; - CRYPTO_r_lock(CRYPTO_LOCK_RSA_BLINDING); - ret = BN_BLINDING_convert_ex(f, r, b, ctx); - CRYPTO_r_unlock(CRYPTO_LOCK_RSA_BLINDING); - return ret; - } -} - -static int rsa_blinding_invert(BN_BLINDING *b, int local, BIGNUM *f, - BIGNUM *r, BN_CTX *ctx) -{ - if (local) - return BN_BLINDING_invert_ex(f, NULL, b, ctx); - else - { + /* Shared blinding: store the unblinding factor + * outside BN_BLINDING. */ int ret; CRYPTO_w_lock(CRYPTO_LOCK_RSA_BLINDING); - ret = BN_BLINDING_invert_ex(f, r, b, ctx); + ret = BN_BLINDING_convert_ex(f, unblind, b, ctx); CRYPTO_w_unlock(CRYPTO_LOCK_RSA_BLINDING); return ret; } -} + } + +static int rsa_blinding_invert(BN_BLINDING *b, BIGNUM *f, BIGNUM *unblind, + BN_CTX *ctx) + { + /* For local blinding, unblind is set to NULL, and BN_BLINDING_invert_ex + * will use the unblinding factor stored in BN_BLINDING. + * If BN_BLINDING is shared between threads, unblind must be non-null: + * BN_BLINDING_invert_ex will then use the local unblinding factor, + * and will only read the modulus from BN_BLINDING. + * In both cases it's safe to access the blinding without a lock. + */ + return BN_BLINDING_invert_ex(f, unblind, b, ctx); + } /* signing */ static int RSA_eay_private_encrypt(int flen, const unsigned char *from, unsigned char *to, RSA *rsa, int padding) { - BIGNUM *f, *ret, *br, *res; + BIGNUM *f, *ret, *res; int i,j,k,num=0,r= -1; unsigned char *buf=NULL; BN_CTX *ctx=NULL; int local_blinding = 0; + /* Used only if the blinding structure is shared. A non-NULL unblind + * instructs rsa_blinding_convert() and rsa_blinding_invert() to store + * the unblinding factor outside the blinding structure. */ + BIGNUM *unblind = NULL; BN_BLINDING *blinding = NULL; if ((ctx=BN_CTX_new()) == NULL) goto err; BN_CTX_start(ctx); f = BN_CTX_get(ctx); - br = BN_CTX_get(ctx); ret = BN_CTX_get(ctx); num = BN_num_bytes(rsa->n); buf = OPENSSL_malloc(num); @@ -406,8 +411,15 @@ static int RSA_eay_private_encrypt(int flen, const unsigned char *from, } if (blinding != NULL) - if (!rsa_blinding_convert(blinding, local_blinding, f, br, ctx)) + { + if (!local_blinding && ((unblind = BN_CTX_get(ctx)) == NULL)) + { + RSAerr(RSA_F_RSA_EAY_PRIVATE_ENCRYPT,ERR_R_MALLOC_FAILURE); + goto err; + } + if (!rsa_blinding_convert(blinding, f, unblind, ctx)) goto err; + } if ( (rsa->flags & RSA_FLAG_EXT_PKEY) || ((rsa->p != NULL) && @@ -441,7 +453,7 @@ static int RSA_eay_private_encrypt(int flen, const unsigned char *from, } if (blinding) - if (!rsa_blinding_invert(blinding, local_blinding, ret, br, ctx)) + if (!rsa_blinding_invert(blinding, ret, unblind, ctx)) goto err; if (padding == RSA_X931_PADDING) @@ -480,18 +492,21 @@ err: static int RSA_eay_private_decrypt(int flen, const unsigned char *from, unsigned char *to, RSA *rsa, int padding) { - BIGNUM *f, *ret, *br; + BIGNUM *f, *ret; int j,num=0,r= -1; unsigned char *p; unsigned char *buf=NULL; BN_CTX *ctx=NULL; int local_blinding = 0; + /* Used only if the blinding structure is shared. A non-NULL unblind + * instructs rsa_blinding_convert() and rsa_blinding_invert() to store + * the unblinding factor outside the blinding structure. */ + BIGNUM *unblind = NULL; BN_BLINDING *blinding = NULL; if((ctx = BN_CTX_new()) == NULL) goto err; BN_CTX_start(ctx); f = BN_CTX_get(ctx); - br = BN_CTX_get(ctx); ret = BN_CTX_get(ctx); num = BN_num_bytes(rsa->n); buf = OPENSSL_malloc(num); @@ -529,8 +544,15 @@ static int RSA_eay_private_decrypt(int flen, const unsigned char *from, } if (blinding != NULL) - if (!rsa_blinding_convert(blinding, local_blinding, f, br, ctx)) + { + if (!local_blinding && ((unblind = BN_CTX_get(ctx)) == NULL)) + { + RSAerr(RSA_F_RSA_EAY_PRIVATE_DECRYPT,ERR_R_MALLOC_FAILURE); goto err; + } + if (!rsa_blinding_convert(blinding, f, unblind, ctx)) + goto err; + } /* do the decrypt */ if ( (rsa->flags & RSA_FLAG_EXT_PKEY) || @@ -564,7 +586,7 @@ static int RSA_eay_private_decrypt(int flen, const unsigned char *from, } if (blinding) - if (!rsa_blinding_invert(blinding, local_blinding, ret, br, ctx)) + if (!rsa_blinding_invert(blinding, ret, unblind, ctx)) goto err; p=buf; @@ -825,12 +847,12 @@ static int RSA_eay_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx) if (!BN_mod(r0,pr1,rsa->p,ctx)) goto err; /* If p < q it is occasionally possible for the correction of - * adding 'p' if r0 is negative above to leave the result still + * adding 'p' if r0 is negative above to leave the result still * negative. This can break the private key operations: the following * second correction should *always* correct this rare occurrence. * This will *never* happen with OpenSSL generated keys because - * they ensure p > q [steve] - */ + * they ensure p > q [steve] + */ if (BN_is_negative(r0)) if (!BN_add(r0,r0,rsa->p)) goto err; if (!BN_mul(r1,r0,rsa->q,ctx)) goto err; diff --git a/app/openssl/crypto/rsa/rsa_err.c b/app/openssl/crypto/rsa/rsa_err.c index cf9f1106..46e0bf99 100644 --- a/app/openssl/crypto/rsa/rsa_err.c +++ b/app/openssl/crypto/rsa/rsa_err.c @@ -1,6 +1,6 @@ /* crypto/rsa/rsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -78,6 +78,7 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_PKEY_RSA_CTRL), "PKEY_RSA_CTRL"}, {ERR_FUNC(RSA_F_PKEY_RSA_CTRL_STR), "PKEY_RSA_CTRL_STR"}, {ERR_FUNC(RSA_F_PKEY_RSA_SIGN), "PKEY_RSA_SIGN"}, +{ERR_FUNC(RSA_F_PKEY_RSA_VERIFY), "PKEY_RSA_VERIFY"}, {ERR_FUNC(RSA_F_PKEY_RSA_VERIFYRECOVER), "PKEY_RSA_VERIFYRECOVER"}, {ERR_FUNC(RSA_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"}, {ERR_FUNC(RSA_F_RSA_CHECK_KEY), "RSA_check_key"}, @@ -86,6 +87,8 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PUBLIC_ENCRYPT), "RSA_EAY_PUBLIC_ENCRYPT"}, {ERR_FUNC(RSA_F_RSA_GENERATE_KEY), "RSA_generate_key"}, +{ERR_FUNC(RSA_F_RSA_GENERATE_KEY_EX), "RSA_generate_key_ex"}, +{ERR_FUNC(RSA_F_RSA_ITEM_VERIFY), "RSA_ITEM_VERIFY"}, {ERR_FUNC(RSA_F_RSA_MEMORY_LOCK), "RSA_memory_lock"}, {ERR_FUNC(RSA_F_RSA_NEW_METHOD), "RSA_new_method"}, {ERR_FUNC(RSA_F_RSA_NULL), "RSA_NULL"}, @@ -97,6 +100,7 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_RSA_PADDING_ADD_NONE), "RSA_padding_add_none"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP), "RSA_padding_add_PKCS1_OAEP"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS), "RSA_padding_add_PKCS1_PSS"}, +{ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1), "RSA_padding_add_PKCS1_PSS_mgf1"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_1), "RSA_padding_add_PKCS1_type_1"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_2), "RSA_padding_add_PKCS1_type_2"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_SSLV23), "RSA_padding_add_SSLv23"}, @@ -109,8 +113,12 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_X931), "RSA_padding_check_X931"}, {ERR_FUNC(RSA_F_RSA_PRINT), "RSA_print"}, {ERR_FUNC(RSA_F_RSA_PRINT_FP), "RSA_print_fp"}, +{ERR_FUNC(RSA_F_RSA_PRIVATE_DECRYPT), "RSA_private_decrypt"}, +{ERR_FUNC(RSA_F_RSA_PRIVATE_ENCRYPT), "RSA_private_encrypt"}, {ERR_FUNC(RSA_F_RSA_PRIV_DECODE), "RSA_PRIV_DECODE"}, {ERR_FUNC(RSA_F_RSA_PRIV_ENCODE), "RSA_PRIV_ENCODE"}, +{ERR_FUNC(RSA_F_RSA_PUBLIC_DECRYPT), "RSA_public_decrypt"}, +{ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"}, {ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"}, {ERR_FUNC(RSA_F_RSA_SETUP_BLINDING), "RSA_setup_blinding"}, {ERR_FUNC(RSA_F_RSA_SIGN), "RSA_sign"}, @@ -118,6 +126,7 @@ static ERR_STRING_DATA RSA_str_functs[]= {ERR_FUNC(RSA_F_RSA_VERIFY), "RSA_verify"}, {ERR_FUNC(RSA_F_RSA_VERIFY_ASN1_OCTET_STRING), "RSA_verify_ASN1_OCTET_STRING"}, {ERR_FUNC(RSA_F_RSA_VERIFY_PKCS1_PSS), "RSA_verify_PKCS1_PSS"}, +{ERR_FUNC(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1), "RSA_verify_PKCS1_PSS_mgf1"}, {0,NULL} }; @@ -146,19 +155,24 @@ static ERR_STRING_DATA RSA_str_reasons[]= {ERR_REASON(RSA_R_INVALID_HEADER) ,"invalid header"}, {ERR_REASON(RSA_R_INVALID_KEYBITS) ,"invalid keybits"}, {ERR_REASON(RSA_R_INVALID_MESSAGE_LENGTH),"invalid message length"}, +{ERR_REASON(RSA_R_INVALID_MGF1_MD) ,"invalid mgf1 md"}, {ERR_REASON(RSA_R_INVALID_PADDING) ,"invalid padding"}, {ERR_REASON(RSA_R_INVALID_PADDING_MODE) ,"invalid padding mode"}, +{ERR_REASON(RSA_R_INVALID_PSS_PARAMETERS),"invalid pss parameters"}, {ERR_REASON(RSA_R_INVALID_PSS_SALTLEN) ,"invalid pss saltlen"}, +{ERR_REASON(RSA_R_INVALID_SALT_LENGTH) ,"invalid salt length"}, {ERR_REASON(RSA_R_INVALID_TRAILER) ,"invalid trailer"}, {ERR_REASON(RSA_R_INVALID_X931_DIGEST) ,"invalid x931 digest"}, {ERR_REASON(RSA_R_IQMP_NOT_INVERSE_OF_Q) ,"iqmp not inverse of q"}, {ERR_REASON(RSA_R_KEY_SIZE_TOO_SMALL) ,"key size too small"}, {ERR_REASON(RSA_R_LAST_OCTET_INVALID) ,"last octet invalid"}, {ERR_REASON(RSA_R_MODULUS_TOO_LARGE) ,"modulus too large"}, +{ERR_REASON(RSA_R_NON_FIPS_RSA_METHOD) ,"non fips rsa method"}, {ERR_REASON(RSA_R_NO_PUBLIC_EXPONENT) ,"no public exponent"}, {ERR_REASON(RSA_R_NULL_BEFORE_BLOCK_MISSING),"null before block missing"}, {ERR_REASON(RSA_R_N_DOES_NOT_EQUAL_P_Q) ,"n does not equal p q"}, {ERR_REASON(RSA_R_OAEP_DECODING_ERROR) ,"oaep decoding error"}, +{ERR_REASON(RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE),"operation not allowed in fips mode"}, {ERR_REASON(RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE),"operation not supported for this keytype"}, {ERR_REASON(RSA_R_PADDING_CHECK_FAILED) ,"padding check failed"}, {ERR_REASON(RSA_R_P_NOT_PRIME) ,"p not prime"}, @@ -169,7 +183,12 @@ static ERR_STRING_DATA RSA_str_reasons[]= {ERR_REASON(RSA_R_SSLV3_ROLLBACK_ATTACK) ,"sslv3 rollback attack"}, {ERR_REASON(RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD),"the asn1 object identifier is not known for this md"}, {ERR_REASON(RSA_R_UNKNOWN_ALGORITHM_TYPE),"unknown algorithm type"}, +{ERR_REASON(RSA_R_UNKNOWN_MASK_DIGEST) ,"unknown mask digest"}, {ERR_REASON(RSA_R_UNKNOWN_PADDING_TYPE) ,"unknown padding type"}, +{ERR_REASON(RSA_R_UNKNOWN_PSS_DIGEST) ,"unknown pss digest"}, +{ERR_REASON(RSA_R_UNSUPPORTED_MASK_ALGORITHM),"unsupported mask algorithm"}, +{ERR_REASON(RSA_R_UNSUPPORTED_MASK_PARAMETER),"unsupported mask parameter"}, +{ERR_REASON(RSA_R_UNSUPPORTED_SIGNATURE_TYPE),"unsupported signature type"}, {ERR_REASON(RSA_R_VALUE_MISSING) ,"value missing"}, {ERR_REASON(RSA_R_WRONG_SIGNATURE_LENGTH),"wrong signature length"}, {0,NULL} diff --git a/app/openssl/crypto/rsa/rsa_gen.c b/app/openssl/crypto/rsa/rsa_gen.c index 767f7ab6..42290cce 100644 --- a/app/openssl/crypto/rsa/rsa_gen.c +++ b/app/openssl/crypto/rsa/rsa_gen.c @@ -67,6 +67,9 @@ #include "cryptlib.h" #include #include +#ifdef OPENSSL_FIPS +#include +#endif static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb); @@ -77,8 +80,20 @@ static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) * now just because key-generation is part of RSA_METHOD. */ int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) { +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_GENERATE_KEY_EX, RSA_R_NON_FIPS_RSA_METHOD); + return 0; + } +#endif if(rsa->meth->rsa_keygen) return rsa->meth->rsa_keygen(rsa, bits, e_value, cb); +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_rsa_generate_key_ex(rsa, bits, e_value, cb); +#endif return rsa_builtin_keygen(rsa, bits, e_value, cb); } diff --git a/app/openssl/crypto/rsa/rsa_lib.c b/app/openssl/crypto/rsa/rsa_lib.c index de45088d..c95ceafc 100644 --- a/app/openssl/crypto/rsa/rsa_lib.c +++ b/app/openssl/crypto/rsa/rsa_lib.c @@ -67,6 +67,10 @@ #include #endif +#ifdef OPENSSL_FIPS +#include +#endif + const char RSA_version[]="RSA" OPENSSL_VERSION_PTEXT; static const RSA_METHOD *default_RSA_meth=NULL; @@ -87,11 +91,14 @@ const RSA_METHOD *RSA_get_default_method(void) { if (default_RSA_meth == NULL) { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_rsa_pkcs1_ssleay(); + else + return RSA_PKCS1_SSLeay(); +#else #ifdef RSA_NULL default_RSA_meth=RSA_null_method(); -#else -#if 0 /* was: #ifdef RSAref */ - default_RSA_meth=RSA_PKCS1_RSAref(); #else default_RSA_meth=RSA_PKCS1_SSLeay(); #endif @@ -181,7 +188,7 @@ RSA *RSA_new_method(ENGINE *engine) ret->blinding=NULL; ret->mt_blinding=NULL; ret->bignum_data=NULL; - ret->flags=ret->meth->flags; + ret->flags=ret->meth->flags & ~RSA_FLAG_NON_FIPS_ALLOW; if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_RSA, ret, &ret->ex_data)) { #ifndef OPENSSL_NO_ENGINE @@ -280,163 +287,6 @@ void *RSA_get_ex_data(const RSA *r, int idx) return(CRYPTO_get_ex_data(&r->ex_data,idx)); } -int RSA_size(const RSA *r) - { - return(BN_num_bytes(r->n)); - } - -int RSA_public_encrypt(int flen, const unsigned char *from, unsigned char *to, - RSA *rsa, int padding) - { - return(rsa->meth->rsa_pub_enc(flen, from, to, rsa, padding)); - } - -int RSA_private_encrypt(int flen, const unsigned char *from, unsigned char *to, - RSA *rsa, int padding) - { - return(rsa->meth->rsa_priv_enc(flen, from, to, rsa, padding)); - } - -int RSA_private_decrypt(int flen, const unsigned char *from, unsigned char *to, - RSA *rsa, int padding) - { - return(rsa->meth->rsa_priv_dec(flen, from, to, rsa, padding)); - } - -int RSA_public_decrypt(int flen, const unsigned char *from, unsigned char *to, - RSA *rsa, int padding) - { - return(rsa->meth->rsa_pub_dec(flen, from, to, rsa, padding)); - } - -int RSA_flags(const RSA *r) - { - return((r == NULL)?0:r->meth->flags); - } - -void RSA_blinding_off(RSA *rsa) - { - if (rsa->blinding != NULL) - { - BN_BLINDING_free(rsa->blinding); - rsa->blinding=NULL; - } - rsa->flags &= ~RSA_FLAG_BLINDING; - rsa->flags |= RSA_FLAG_NO_BLINDING; - } - -int RSA_blinding_on(RSA *rsa, BN_CTX *ctx) - { - int ret=0; - - if (rsa->blinding != NULL) - RSA_blinding_off(rsa); - - rsa->blinding = RSA_setup_blinding(rsa, ctx); - if (rsa->blinding == NULL) - goto err; - - rsa->flags |= RSA_FLAG_BLINDING; - rsa->flags &= ~RSA_FLAG_NO_BLINDING; - ret=1; -err: - return(ret); - } - -static BIGNUM *rsa_get_public_exp(const BIGNUM *d, const BIGNUM *p, - const BIGNUM *q, BN_CTX *ctx) -{ - BIGNUM *ret = NULL, *r0, *r1, *r2; - - if (d == NULL || p == NULL || q == NULL) - return NULL; - - BN_CTX_start(ctx); - r0 = BN_CTX_get(ctx); - r1 = BN_CTX_get(ctx); - r2 = BN_CTX_get(ctx); - if (r2 == NULL) - goto err; - - if (!BN_sub(r1, p, BN_value_one())) goto err; - if (!BN_sub(r2, q, BN_value_one())) goto err; - if (!BN_mul(r0, r1, r2, ctx)) goto err; - - ret = BN_mod_inverse(NULL, d, r0, ctx); -err: - BN_CTX_end(ctx); - return ret; -} - -BN_BLINDING *RSA_setup_blinding(RSA *rsa, BN_CTX *in_ctx) -{ - BIGNUM local_n; - BIGNUM *e,*n; - BN_CTX *ctx; - BN_BLINDING *ret = NULL; - - if (in_ctx == NULL) - { - if ((ctx = BN_CTX_new()) == NULL) return 0; - } - else - ctx = in_ctx; - - BN_CTX_start(ctx); - e = BN_CTX_get(ctx); - if (e == NULL) - { - RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_MALLOC_FAILURE); - goto err; - } - - if (rsa->e == NULL) - { - e = rsa_get_public_exp(rsa->d, rsa->p, rsa->q, ctx); - if (e == NULL) - { - RSAerr(RSA_F_RSA_SETUP_BLINDING, RSA_R_NO_PUBLIC_EXPONENT); - goto err; - } - } - else - e = rsa->e; - - - if ((RAND_status() == 0) && rsa->d != NULL && rsa->d->d != NULL) - { - /* if PRNG is not properly seeded, resort to secret - * exponent as unpredictable seed */ - RAND_add(rsa->d->d, rsa->d->dmax * sizeof rsa->d->d[0], 0.0); - } - - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) - { - /* Set BN_FLG_CONSTTIME flag */ - n = &local_n; - BN_with_flags(n, rsa->n, BN_FLG_CONSTTIME); - } - else - n = rsa->n; - - ret = BN_BLINDING_create_param(NULL, e, n, ctx, - rsa->meth->bn_mod_exp, rsa->_method_mod_n); - if (ret == NULL) - { - RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_BN_LIB); - goto err; - } - CRYPTO_THREADID_current(BN_BLINDING_thread_id(ret)); -err: - BN_CTX_end(ctx); - if (in_ctx == NULL) - BN_CTX_free(ctx); - if(rsa->e == NULL) - BN_free(e); - - return ret; -} - int RSA_memory_lock(RSA *r) { int i,j,k,off; diff --git a/app/openssl/crypto/rsa/rsa_oaep.c b/app/openssl/crypto/rsa/rsa_oaep.c index 18d307ea..af4d24a5 100644 --- a/app/openssl/crypto/rsa/rsa_oaep.c +++ b/app/openssl/crypto/rsa/rsa_oaep.c @@ -56,7 +56,8 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, seed = to + 1; db = to + SHA_DIGEST_LENGTH + 1; - EVP_Digest((void *)param, plen, db, NULL, EVP_sha1(), NULL); + if (!EVP_Digest((void *)param, plen, db, NULL, EVP_sha1(), NULL)) + return 0; memset(db + SHA_DIGEST_LENGTH, 0, emlen - flen - 2 * SHA_DIGEST_LENGTH - 1); db[emlen - flen - SHA_DIGEST_LENGTH - 1] = 0x01; @@ -145,9 +146,10 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, for (i = 0; i < dblen; i++) db[i] ^= maskeddb[i]; - EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL); + if (!EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL)) + return -1; - if (memcmp(db, phash, SHA_DIGEST_LENGTH) != 0 || bad) + if (CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH) != 0 || bad) goto decoding_err; else { diff --git a/app/openssl/crypto/rsa/rsa_pmeth.c b/app/openssl/crypto/rsa/rsa_pmeth.c index c6892ecd..157aa5c4 100644 --- a/app/openssl/crypto/rsa/rsa_pmeth.c +++ b/app/openssl/crypto/rsa/rsa_pmeth.c @@ -63,6 +63,12 @@ #include #include #include +#ifndef OPENSSL_NO_CMS +#include +#endif +#ifdef OPENSSL_FIPS +#include +#endif #include "evp_locl.h" #include "rsa_locl.h" @@ -79,6 +85,8 @@ typedef struct int pad_mode; /* message digest */ const EVP_MD *md; + /* message digest for MGF1 */ + const EVP_MD *mgf1md; /* PSS/OAEP salt length */ int saltlen; /* Temp buffer */ @@ -95,6 +103,7 @@ static int pkey_rsa_init(EVP_PKEY_CTX *ctx) rctx->pub_exp = NULL; rctx->pad_mode = RSA_PKCS1_PADDING; rctx->md = NULL; + rctx->mgf1md = NULL; rctx->tbuf = NULL; rctx->saltlen = -2; @@ -147,6 +156,31 @@ static void pkey_rsa_cleanup(EVP_PKEY_CTX *ctx) OPENSSL_free(rctx); } } +#ifdef OPENSSL_FIPS +/* FIP checker. Return value indicates status of context parameters: + * 1 : redirect to FIPS. + * 0 : don't redirect to FIPS. + * -1 : illegal operation in FIPS mode. + */ + +static int pkey_fips_check_ctx(EVP_PKEY_CTX *ctx) + { + RSA_PKEY_CTX *rctx = ctx->data; + RSA *rsa = ctx->pkey->pkey.rsa; + int rv = -1; + if (!FIPS_mode()) + return 0; + if (rsa->flags & RSA_FLAG_NON_FIPS_ALLOW) + rv = 0; + if (!(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) && rv) + return -1; + if (rctx->md && !(rctx->md->flags & EVP_MD_FLAG_FIPS)) + return rv; + if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) + return rv; + return 1; + } +#endif static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, const unsigned char *tbs, size_t tbslen) @@ -155,6 +189,15 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, RSA_PKEY_CTX *rctx = ctx->data; RSA *rsa = ctx->pkey->pkey.rsa; +#ifdef OPENSSL_FIPS + ret = pkey_fips_check_ctx(ctx); + if (ret < 0) + { + RSAerr(RSA_F_PKEY_RSA_SIGN, RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE); + return -1; + } +#endif + if (rctx->md) { if (tbslen != (size_t)EVP_MD_size(rctx->md)) @@ -163,7 +206,36 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, RSA_R_INVALID_DIGEST_LENGTH); return -1; } - if (rctx->pad_mode == RSA_X931_PADDING) +#ifdef OPENSSL_FIPS + if (ret > 0) + { + unsigned int slen; + ret = FIPS_rsa_sign_digest(rsa, tbs, tbslen, rctx->md, + rctx->pad_mode, + rctx->saltlen, + rctx->mgf1md, + sig, &slen); + if (ret > 0) + *siglen = slen; + else + *siglen = 0; + return ret; + } +#endif + + if (EVP_MD_type(rctx->md) == NID_mdc2) + { + unsigned int sltmp; + if (rctx->pad_mode != RSA_PKCS1_PADDING) + return -1; + ret = RSA_sign_ASN1_OCTET_STRING(NID_mdc2, + tbs, tbslen, sig, &sltmp, rsa); + + if (ret <= 0) + return ret; + ret = sltmp; + } + else if (rctx->pad_mode == RSA_X931_PADDING) { if (!setup_tbuf(rctx, ctx)) return -1; @@ -186,8 +258,10 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, { if (!setup_tbuf(rctx, ctx)) return -1; - if (!RSA_padding_add_PKCS1_PSS(rsa, rctx->tbuf, tbs, - rctx->md, rctx->saltlen)) + if (!RSA_padding_add_PKCS1_PSS_mgf1(rsa, + rctx->tbuf, tbs, + rctx->md, rctx->mgf1md, + rctx->saltlen)) return -1; ret = RSA_private_encrypt(RSA_size(rsa), rctx->tbuf, sig, rsa, RSA_NO_PADDING); @@ -269,8 +343,30 @@ static int pkey_rsa_verify(EVP_PKEY_CTX *ctx, RSA_PKEY_CTX *rctx = ctx->data; RSA *rsa = ctx->pkey->pkey.rsa; size_t rslen; +#ifdef OPENSSL_FIPS + int rv; + rv = pkey_fips_check_ctx(ctx); + if (rv < 0) + { + RSAerr(RSA_F_PKEY_RSA_VERIFY, RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE); + return -1; + } +#endif if (rctx->md) { +#ifdef OPENSSL_FIPS + if (rv > 0) + { + return FIPS_rsa_verify_digest(rsa, + tbs, tbslen, + rctx->md, + rctx->pad_mode, + rctx->saltlen, + rctx->mgf1md, + sig, siglen); + + } +#endif if (rctx->pad_mode == RSA_PKCS1_PADDING) return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen, sig, siglen, rsa); @@ -289,7 +385,8 @@ static int pkey_rsa_verify(EVP_PKEY_CTX *ctx, rsa, RSA_NO_PADDING); if (ret <= 0) return 0; - ret = RSA_verify_PKCS1_PSS(rsa, tbs, rctx->md, + ret = RSA_verify_PKCS1_PSS_mgf1(rsa, tbs, + rctx->md, rctx->mgf1md, rctx->tbuf, rctx->saltlen); if (ret <= 0) return 0; @@ -403,15 +500,25 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE); return -2; + case EVP_PKEY_CTRL_GET_RSA_PADDING: + *(int *)p2 = rctx->pad_mode; + return 1; + case EVP_PKEY_CTRL_RSA_PSS_SALTLEN: - if (p1 < -2) - return -2; + case EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN: if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) { RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PSS_SALTLEN); return -2; } - rctx->saltlen = p1; + if (type == EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN) + *(int *)p2 = rctx->saltlen; + else + { + if (p1 < -2) + return -2; + rctx->saltlen = p1; + } return 1; case EVP_PKEY_CTRL_RSA_KEYGEN_BITS: @@ -435,16 +542,45 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) rctx->md = p2; return 1; + case EVP_PKEY_CTRL_RSA_MGF1_MD: + case EVP_PKEY_CTRL_GET_RSA_MGF1_MD: + if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) + { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_MGF1_MD); + return -2; + } + if (type == EVP_PKEY_CTRL_GET_RSA_MGF1_MD) + { + if (rctx->mgf1md) + *(const EVP_MD **)p2 = rctx->mgf1md; + else + *(const EVP_MD **)p2 = rctx->md; + } + else + rctx->mgf1md = p2; + return 1; + case EVP_PKEY_CTRL_DIGESTINIT: case EVP_PKEY_CTRL_PKCS7_ENCRYPT: case EVP_PKEY_CTRL_PKCS7_DECRYPT: case EVP_PKEY_CTRL_PKCS7_SIGN: + return 1; #ifndef OPENSSL_NO_CMS - case EVP_PKEY_CTRL_CMS_ENCRYPT: case EVP_PKEY_CTRL_CMS_DECRYPT: + { + X509_ALGOR *alg = NULL; + ASN1_OBJECT *encalg = NULL; + if (p2) + CMS_RecipientInfo_ktri_get0_algs(p2, NULL, NULL, &alg); + if (alg) + X509_ALGOR_get0(&encalg, NULL, NULL, alg); + if (encalg && OBJ_obj2nid(encalg) == NID_rsaesOaep) + rctx->pad_mode = RSA_PKCS1_OAEP_PADDING; + } + case EVP_PKEY_CTRL_CMS_ENCRYPT: case EVP_PKEY_CTRL_CMS_SIGN: -#endif return 1; +#endif case EVP_PKEY_CTRL_PEER_KEY: RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); @@ -475,6 +611,8 @@ static int pkey_rsa_ctrl_str(EVP_PKEY_CTX *ctx, pm = RSA_NO_PADDING; else if (!strcmp(value, "oeap")) pm = RSA_PKCS1_OAEP_PADDING; + else if (!strcmp(value, "oaep")) + pm = RSA_PKCS1_OAEP_PADDING; else if (!strcmp(value, "x931")) pm = RSA_X931_PADDING; else if (!strcmp(value, "pss")) diff --git a/app/openssl/crypto/rsa/rsa_pss.c b/app/openssl/crypto/rsa/rsa_pss.c index ac211e2f..5f9f533d 100644 --- a/app/openssl/crypto/rsa/rsa_pss.c +++ b/app/openssl/crypto/rsa/rsa_pss.c @@ -73,6 +73,13 @@ static const unsigned char zeroes[] = {0,0,0,0,0,0,0,0}; int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, const EVP_MD *Hash, const unsigned char *EM, int sLen) { + return RSA_verify_PKCS1_PSS_mgf1(rsa, mHash, Hash, NULL, EM, sLen); + } + +int RSA_verify_PKCS1_PSS_mgf1(RSA *rsa, const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, + const unsigned char *EM, int sLen) + { int i; int ret = 0; int hLen, maskedDBLen, MSBits, emLen; @@ -80,6 +87,10 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, unsigned char *DB = NULL; EVP_MD_CTX ctx; unsigned char H_[EVP_MAX_MD_SIZE]; + EVP_MD_CTX_init(&ctx); + + if (mgf1Hash == NULL) + mgf1Hash = Hash; hLen = EVP_MD_size(Hash); if (hLen < 0) @@ -94,7 +105,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, else if (sLen == -2) sLen = -2; else if (sLen < -2) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED); goto err; } @@ -102,7 +113,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, emLen = RSA_size(rsa); if (EM[0] & (0xFF << MSBits)) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_FIRST_OCTET_INVALID); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_FIRST_OCTET_INVALID); goto err; } if (MSBits == 0) @@ -112,12 +123,12 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, } if (emLen < (hLen + sLen + 2)) /* sLen can be small negative */ { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_DATA_TOO_LARGE); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_DATA_TOO_LARGE); goto err; } if (EM[emLen - 1] != 0xbc) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_LAST_OCTET_INVALID); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_LAST_OCTET_INVALID); goto err; } maskedDBLen = emLen - hLen - 1; @@ -125,10 +136,10 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, DB = OPENSSL_malloc(maskedDBLen); if (!DB) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, ERR_R_MALLOC_FAILURE); goto err; } - if (PKCS1_MGF1(DB, maskedDBLen, H, hLen, Hash) < 0) + if (PKCS1_MGF1(DB, maskedDBLen, H, hLen, mgf1Hash) < 0) goto err; for (i = 0; i < maskedDBLen; i++) DB[i] ^= EM[i]; @@ -137,25 +148,28 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, for (i = 0; DB[i] == 0 && i < (maskedDBLen-1); i++) ; if (DB[i++] != 0x1) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_RECOVERY_FAILED); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_RECOVERY_FAILED); goto err; } if (sLen >= 0 && (maskedDBLen - i) != sLen) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED); goto err; } - EVP_MD_CTX_init(&ctx); - EVP_DigestInit_ex(&ctx, Hash, NULL); - EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes); - EVP_DigestUpdate(&ctx, mHash, hLen); + if (!EVP_DigestInit_ex(&ctx, Hash, NULL) + || !EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes) + || !EVP_DigestUpdate(&ctx, mHash, hLen)) + goto err; if (maskedDBLen - i) - EVP_DigestUpdate(&ctx, DB + i, maskedDBLen - i); - EVP_DigestFinal(&ctx, H_, NULL); - EVP_MD_CTX_cleanup(&ctx); + { + if (!EVP_DigestUpdate(&ctx, DB + i, maskedDBLen - i)) + goto err; + } + if (!EVP_DigestFinal_ex(&ctx, H_, NULL)) + goto err; if (memcmp(H_, H, hLen)) { - RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_BAD_SIGNATURE); + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_BAD_SIGNATURE); ret = 0; } else @@ -164,6 +178,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, err: if (DB) OPENSSL_free(DB); + EVP_MD_CTX_cleanup(&ctx); return ret; @@ -173,12 +188,22 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, const unsigned char *mHash, const EVP_MD *Hash, int sLen) { + return RSA_padding_add_PKCS1_PSS_mgf1(rsa, EM, mHash, Hash, NULL, sLen); + } + +int RSA_padding_add_PKCS1_PSS_mgf1(RSA *rsa, unsigned char *EM, + const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, int sLen) + { int i; int ret = 0; int hLen, maskedDBLen, MSBits, emLen; unsigned char *H, *salt = NULL, *p; EVP_MD_CTX ctx; + if (mgf1Hash == NULL) + mgf1Hash = Hash; + hLen = EVP_MD_size(Hash); if (hLen < 0) goto err; @@ -192,7 +217,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, else if (sLen == -2) sLen = -2; else if (sLen < -2) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED); goto err; } @@ -209,8 +234,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, } else if (emLen < (hLen + sLen + 2)) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, - RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1,RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); goto err; } if (sLen > 0) @@ -218,8 +242,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, salt = OPENSSL_malloc(sLen); if (!salt) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, - ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1,ERR_R_MALLOC_FAILURE); goto err; } if (RAND_bytes(salt, sLen) <= 0) @@ -228,16 +251,18 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, maskedDBLen = emLen - hLen - 1; H = EM + maskedDBLen; EVP_MD_CTX_init(&ctx); - EVP_DigestInit_ex(&ctx, Hash, NULL); - EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes); - EVP_DigestUpdate(&ctx, mHash, hLen); - if (sLen) - EVP_DigestUpdate(&ctx, salt, sLen); - EVP_DigestFinal(&ctx, H, NULL); + if (!EVP_DigestInit_ex(&ctx, Hash, NULL) + || !EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes) + || !EVP_DigestUpdate(&ctx, mHash, hLen)) + goto err; + if (sLen && !EVP_DigestUpdate(&ctx, salt, sLen)) + goto err; + if (!EVP_DigestFinal_ex(&ctx, H, NULL)) + goto err; EVP_MD_CTX_cleanup(&ctx); /* Generate dbMask in place then perform XOR on it */ - if (PKCS1_MGF1(EM, maskedDBLen, H, hLen, Hash)) + if (PKCS1_MGF1(EM, maskedDBLen, H, hLen, mgf1Hash)) goto err; p = EM; diff --git a/app/openssl/crypto/rsa/rsa_sign.c b/app/openssl/crypto/rsa/rsa_sign.c index 0be4ec7f..b6f6037a 100644 --- a/app/openssl/crypto/rsa/rsa_sign.c +++ b/app/openssl/crypto/rsa/rsa_sign.c @@ -77,6 +77,14 @@ int RSA_sign(int type, const unsigned char *m, unsigned int m_len, const unsigned char *s = NULL; X509_ALGOR algor; ASN1_OCTET_STRING digest; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_RSA_SIGN, RSA_R_NON_FIPS_RSA_METHOD); + return 0; + } +#endif if((rsa->flags & RSA_FLAG_SIGN_VER) && rsa->meth->rsa_sign) { return rsa->meth->rsa_sign(type, m, m_len, @@ -153,6 +161,15 @@ int int_rsa_verify(int dtype, const unsigned char *m, unsigned char *s; X509_SIG *sig=NULL; +#ifdef OPENSSL_FIPS + if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) + && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)) + { + RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_NON_FIPS_RSA_METHOD); + return 0; + } +#endif + if (siglen != (unsigned int)RSA_size(rsa)) { RSAerr(RSA_F_INT_RSA_VERIFY,RSA_R_WRONG_SIGNATURE_LENGTH); @@ -182,6 +199,22 @@ int int_rsa_verify(int dtype, const unsigned char *m, i=RSA_public_decrypt((int)siglen,sigbuf,s,rsa,RSA_PKCS1_PADDING); if (i <= 0) goto err; + /* Oddball MDC2 case: signature can be OCTET STRING. + * check for correct tag and length octets. + */ + if (dtype == NID_mdc2 && i == 18 && s[0] == 0x04 && s[1] == 0x10) + { + if (rm) + { + memcpy(rm, s + 2, 16); + *prm_len = 16; + ret = 1; + } + else if(memcmp(m, s + 2, 16)) + RSAerr(RSA_F_INT_RSA_VERIFY,RSA_R_BAD_SIGNATURE); + else + ret = 1; + } /* Special case: SSL signature */ if(dtype == NID_md5_sha1) { diff --git a/app/openssl/crypto/s390xcap.c b/app/openssl/crypto/s390xcap.c index ffbe0235..f2e94ef4 100644 --- a/app/openssl/crypto/s390xcap.c +++ b/app/openssl/crypto/s390xcap.c @@ -4,7 +4,7 @@ #include #include -extern unsigned long OPENSSL_s390xcap_P; +extern unsigned long OPENSSL_s390xcap_P[]; static sigjmp_buf ill_jmp; static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } @@ -16,7 +16,9 @@ void OPENSSL_cpuid_setup(void) sigset_t oset; struct sigaction ill_act,oact; - if (OPENSSL_s390xcap_P) return; + if (OPENSSL_s390xcap_P[0]) return; + + OPENSSL_s390xcap_P[0] = 1UL<<(8*sizeof(unsigned long)-1); memset(&ill_act,0,sizeof(ill_act)); ill_act.sa_handler = ill_handler; @@ -27,10 +29,8 @@ void OPENSSL_cpuid_setup(void) sigaction (SIGILL,&ill_act,&oact); /* protection against missing store-facility-list-extended */ - if (sigsetjmp(ill_jmp,0) == 0) - OPENSSL_s390xcap_P = OPENSSL_s390x_facilities(); - else - OPENSSL_s390xcap_P = 1UL<<63; + if (sigsetjmp(ill_jmp,1) == 0) + OPENSSL_s390x_facilities(); sigaction (SIGILL,&oact,NULL); sigprocmask(SIG_SETMASK,&oset,NULL); diff --git a/app/openssl/crypto/s390xcpuid.S b/app/openssl/crypto/s390xcpuid.S index b053c6a2..06815347 100644 --- a/app/openssl/crypto/s390xcpuid.S +++ b/app/openssl/crypto/s390xcpuid.S @@ -5,10 +5,14 @@ .align 16 OPENSSL_s390x_facilities: lghi %r0,0 - .long 0xb2b0f010 # stfle 16(%r15) - lg %r2,16(%r15) - larl %r1,OPENSSL_s390xcap_P - stg %r2,0(%r1) + larl %r2,OPENSSL_s390xcap_P + stg %r0,8(%r2) + .long 0xb2b02000 # stfle 0(%r2) + brc 8,.Ldone + lghi %r0,1 + .long 0xb2b02000 # stfle 0(%r2) +.Ldone: + lg %r2,0(%r2) br %r14 .size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities @@ -58,6 +62,9 @@ OPENSSL_wipe_cpu: .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r3,%r3 +#endif lghi %r4,15 lghi %r0,0 clgr %r3,%r4 @@ -89,4 +96,4 @@ OPENSSL_cleanse: .section .init brasl %r14,OPENSSL_cpuid_setup -.comm OPENSSL_s390xcap_P,8,8 +.comm OPENSSL_s390xcap_P,16,8 diff --git a/app/openssl/crypto/sha/asm/sha1-586.S b/app/openssl/crypto/sha/asm/sha1-586.S new file mode 100644 index 00000000..47bef2a9 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-586.S @@ -0,0 +1,2639 @@ +.file "sha1-586.s" +.text +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: +.L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal _GLOBAL_OFFSET_TABLE_+[.-.L000pic_point](%ebp),%esi + movl OPENSSL_ia32cap_P@GOT(%esi),%esi + leal .LK_XX_XX-.L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz .L001x86 + testl $16777216,%eax + jz .L001x86 + jmp .Lssse3_shortcut +.align 16 +.L001x86: + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L002loop +.align 16 +.L002loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L002loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order,.-.L_sha1_block_data_order_begin +.type _sha1_block_data_order_ssse3,@function +.align 16 +_sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal .LK_XX_XX-.L003pic_point(%ebp),%ebp +.Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + psubd %xmm7,%xmm2 + movdqa %xmm1,%xmm4 + jmp .L004loop +.align 16 +.L004loop: + addl (%esp),%edi + xorl %edx,%ecx +.byte 102,15,58,15,224,8 + movdqa %xmm3,%xmm6 + movl %eax,%ebp + roll $5,%eax + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm6 + xorl %edx,%esi + addl %eax,%edi + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%edi + pxor %xmm2,%xmm6 + addl 4(%esp),%edx + xorl %ecx,%ebx + movl %edi,%esi + roll $5,%edi + pxor %xmm6,%xmm4 + andl %ebx,%ebp + xorl %ecx,%ebx + movdqa %xmm7,48(%esp) + xorl %ecx,%ebp + addl %edi,%edx + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm6 + rorl $7,%eax + addl %ebp,%edx + addl 8(%esp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm6 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm0,%xmm7 + rorl $7,%edi + addl %esi,%ecx + psrld $30,%xmm0 + por %xmm6,%xmm4 + addl 12(%esp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm7 + pxor %xmm0,%xmm4 + andl %edi,%ebp + xorl %eax,%edi + movdqa 96(%esp),%xmm0 + xorl %eax,%ebp + addl %ecx,%ebx + pxor %xmm7,%xmm4 + movdqa %xmm2,%xmm5 + rorl $7,%edx + addl %ebp,%ebx + addl 16(%esp),%eax + xorl %edi,%edx +.byte 102,15,58,15,233,8 + movdqa %xmm4,%xmm7 + movl %ebx,%ebp + roll $5,%ebx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + andl %edx,%esi + xorl %edi,%edx + psrldq $4,%xmm7 + xorl %edi,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm7 + addl 20(%esp),%edi + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm7,%xmm5 + andl %ecx,%ebp + xorl %edx,%ecx + movdqa %xmm0,(%esp) + xorl %edx,%ebp + addl %eax,%edi + movdqa %xmm5,%xmm1 + movdqa %xmm5,%xmm7 + rorl $7,%ebx + addl %ebp,%edi + addl 24(%esp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + roll $5,%edi + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm7 + xorl %ecx,%esi + addl %edi,%edx + movdqa %xmm1,%xmm0 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm1 + por %xmm7,%xmm5 + addl 28(%esp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm0 + pxor %xmm1,%xmm5 + andl %eax,%ebp + xorl %ebx,%eax + movdqa 112(%esp),%xmm1 + xorl %ebx,%ebp + addl %edx,%ecx + pxor %xmm0,%xmm5 + movdqa %xmm3,%xmm6 + rorl $7,%edi + addl %ebp,%ecx + addl 32(%esp),%ebx + xorl %eax,%edi +.byte 102,15,58,15,242,8 + movdqa %xmm5,%xmm0 + movl %ecx,%ebp + roll $5,%ecx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + andl %edi,%esi + xorl %eax,%edi + psrldq $4,%xmm0 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm0 + addl 36(%esp),%eax + xorl %edi,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm0,%xmm6 + andl %edx,%ebp + xorl %edi,%edx + movdqa %xmm1,16(%esp) + xorl %edi,%ebp + addl %ebx,%eax + movdqa %xmm6,%xmm2 + movdqa %xmm6,%xmm0 + rorl $7,%ecx + addl %ebp,%eax + addl 40(%esp),%edi + xorl %edx,%ecx + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm0 + xorl %edx,%esi + addl %eax,%edi + movdqa %xmm2,%xmm1 + rorl $7,%ebx + addl %esi,%edi + psrld $30,%xmm2 + por %xmm0,%xmm6 + addl 44(%esp),%edx + xorl %ecx,%ebx + movdqa 64(%esp),%xmm0 + movl %edi,%esi + roll $5,%edi + pslld $2,%xmm1 + pxor %xmm2,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + movdqa 112(%esp),%xmm2 + xorl %ecx,%ebp + addl %edi,%edx + pxor %xmm1,%xmm6 + movdqa %xmm4,%xmm7 + rorl $7,%eax + addl %ebp,%edx + addl 48(%esp),%ecx + xorl %ebx,%eax +.byte 102,15,58,15,251,8 + movdqa %xmm6,%xmm1 + movl %edx,%ebp + roll $5,%edx + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm1 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%edi + addl %esi,%ecx + pxor %xmm5,%xmm1 + addl 52(%esp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + pxor %xmm1,%xmm7 + andl %edi,%ebp + xorl %eax,%edi + movdqa %xmm2,32(%esp) + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm7,%xmm3 + movdqa %xmm7,%xmm1 + rorl $7,%edx + addl %ebp,%ebx + addl 56(%esp),%eax + xorl %edi,%edx + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + roll $5,%ebx + andl %edx,%esi + xorl %edi,%edx + psrld $31,%xmm1 + xorl %edi,%esi + addl %ebx,%eax + movdqa %xmm3,%xmm2 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm3 + por %xmm1,%xmm7 + addl 60(%esp),%edi + xorl %edx,%ecx + movdqa 80(%esp),%xmm1 + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm2 + pxor %xmm3,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + movdqa 112(%esp),%xmm3 + xorl %edx,%ebp + addl %eax,%edi + pxor %xmm2,%xmm7 + rorl $7,%ebx + addl %ebp,%edi + movdqa %xmm7,%xmm2 + addl (%esp),%edx + pxor %xmm4,%xmm0 +.byte 102,15,58,15,214,8 + xorl %ecx,%ebx + movl %edi,%ebp + roll $5,%edi + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm3,%xmm4 + paddd %xmm7,%xmm3 + xorl %ecx,%esi + addl %edi,%edx + pxor %xmm2,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%esp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + roll $5,%edx + andl %eax,%ebp + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%ebp + addl %edx,%ecx + psrld $30,%xmm2 + rorl $7,%edi + addl %ebp,%ecx + addl 8(%esp),%ebx + xorl %eax,%edi + movl %ecx,%ebp + roll $5,%ecx + por %xmm2,%xmm0 + andl %edi,%esi + xorl %eax,%edi + movdqa 96(%esp),%xmm2 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%esp),%eax + movdqa %xmm0,%xmm3 + xorl %edi,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%ebp + xorl %edi,%edx + xorl %edi,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 +.byte 102,15,58,15,223,8 + xorl %edx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + xorl %ecx,%esi + addl %eax,%edi + movdqa %xmm4,%xmm5 + paddd %xmm0,%xmm4 + rorl $7,%ebx + addl %esi,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ecx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %ebx,%ebp + addl %edi,%edx + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %ebx,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%edi + addl %esi,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %eax,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + xorl %edi,%ebp + addl %ecx,%ebx + rorl $7,%edx + movdqa %xmm1,%xmm4 + addl %ebp,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 +.byte 102,15,58,15,224,8 + xorl %edi,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + xorl %edx,%esi + addl %ebx,%eax + movdqa 128(%esp),%xmm6 + paddd %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %edx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ecx,%ebp + addl %eax,%edi + rorl $7,%ebx + addl %ebp,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ecx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + xorl %ebx,%esi + addl %edi,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %ebx,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edi + movdqa %xmm2,%xmm5 + addl %ebp,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 +.byte 102,15,58,15,233,8 + xorl %eax,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + xorl %edi,%esi + addl %ecx,%ebx + movdqa %xmm6,%xmm7 + paddd %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %edx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + xorl %ecx,%esi + addl %eax,%edi + rorl $7,%ebx + addl %esi,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ecx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + xorl %ebx,%ebp + addl %edi,%edx + rorl $7,%eax + movdqa %xmm3,%xmm6 + addl %ebp,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 +.byte 102,15,58,15,242,8 + xorl %ebx,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm7,%xmm0 + paddd %xmm3,%xmm7 + rorl $7,%edi + addl %esi,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + xorl %edi,%ebp + addl %ecx,%ebx + rorl $7,%edx + addl %ebp,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edi,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %edx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%ebp + addl %eax,%edi + rorl $7,%ebx + movdqa %xmm4,%xmm7 + addl %ebp,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 +.byte 102,15,58,15,251,8 + xorl %ecx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + xorl %ebx,%esi + addl %edi,%edx + movdqa %xmm0,%xmm1 + paddd %xmm4,%xmm0 + rorl $7,%eax + addl %esi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %ebx,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edi + addl %ebp,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %eax,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + xorl %edi,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + xorl %edi,%ebp + movdqa 80(%esp),%xmm7 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + movdqa %xmm5,%xmm0 + addl %ebp,%eax + movl %ecx,%ebp + pxor %xmm2,%xmm6 +.byte 102,15,58,15,196,8 + xorl %edx,%ecx + addl 32(%esp),%edi + andl %edx,%ebp + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm1,%xmm2 + paddd %xmm5,%xmm1 + addl %ebp,%edi + movl %eax,%ebp + pxor %xmm0,%xmm6 + roll $5,%eax + addl %esi,%edi + xorl %edx,%ecx + addl %eax,%edi + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%esp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%ebp + rorl $7,%eax + psrld $30,%xmm0 + addl %esi,%edx + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ecx,%ebx + addl %edi,%edx + por %xmm0,%xmm6 + movl %eax,%ebp + xorl %ebx,%eax + movdqa 96(%esp),%xmm0 + addl 40(%esp),%ecx + andl %ebx,%ebp + andl %eax,%esi + rorl $7,%edi + addl %ebp,%ecx + movdqa %xmm6,%xmm1 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %edi,%esi + xorl %eax,%edi + addl 44(%esp),%ebx + andl %eax,%esi + andl %edi,%ebp + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %eax,%edi + addl %ecx,%ebx + movl %edx,%ebp + pxor %xmm3,%xmm7 +.byte 102,15,58,15,205,8 + xorl %edi,%edx + addl 48(%esp),%eax + andl %edi,%ebp + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + andl %edx,%esi + rorl $7,%ecx + movdqa 144(%esp),%xmm3 + paddd %xmm6,%xmm2 + addl %ebp,%eax + movl %ebx,%ebp + pxor %xmm1,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %edi,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%esp),%edi + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%ebp + rorl $7,%ebx + psrld $30,%xmm1 + addl %esi,%edi + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %edx,%ecx + addl %eax,%edi + por %xmm1,%xmm7 + movl %ebx,%ebp + xorl %ecx,%ebx + movdqa 64(%esp),%xmm1 + addl 56(%esp),%edx + andl %ecx,%ebp + andl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + movdqa %xmm7,%xmm2 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ecx,%ebx + addl %edi,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%esp),%ecx + andl %ebx,%esi + andl %eax,%ebp + rorl $7,%edi + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %edi,%ebp + pxor %xmm4,%xmm0 +.byte 102,15,58,15,214,8 + xorl %eax,%edi + addl (%esp),%ebx + andl %eax,%ebp + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + andl %edi,%esi + rorl $7,%edx + movdqa %xmm3,%xmm4 + paddd %xmm7,%xmm3 + addl %ebp,%ebx + movl %ecx,%ebp + pxor %xmm2,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + xorl %edi,%edx + addl 4(%esp),%eax + andl %edi,%esi + pslld $2,%xmm0 + andl %edx,%ebp + rorl $7,%ecx + psrld $30,%xmm2 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edi,%edx + addl %ebx,%eax + por %xmm2,%xmm0 + movl %ecx,%ebp + xorl %edx,%ecx + movdqa 80(%esp),%xmm2 + addl 8(%esp),%edi + andl %edx,%ebp + andl %ecx,%esi + rorl $7,%ebx + addl %ebp,%edi + movdqa %xmm0,%xmm3 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %edx,%ecx + addl %eax,%edi + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%esp),%edx + andl %ecx,%esi + andl %ebx,%ebp + rorl $7,%eax + addl %esi,%edx + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ecx,%ebx + addl %edi,%edx + movl %eax,%ebp + pxor %xmm5,%xmm1 +.byte 102,15,58,15,223,8 + xorl %ebx,%eax + addl 16(%esp),%ecx + andl %ebx,%ebp + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + andl %eax,%esi + rorl $7,%edi + movdqa %xmm4,%xmm5 + paddd %xmm0,%xmm4 + addl %ebp,%ecx + movl %edx,%ebp + pxor %xmm3,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + movl %edi,%esi + xorl %eax,%edi + addl 20(%esp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %edi,%ebp + rorl $7,%edx + psrld $30,%xmm3 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %eax,%edi + addl %ecx,%ebx + por %xmm3,%xmm1 + movl %edx,%ebp + xorl %edi,%edx + movdqa 96(%esp),%xmm3 + addl 24(%esp),%eax + andl %edi,%ebp + andl %edx,%esi + rorl $7,%ecx + addl %ebp,%eax + movdqa %xmm1,%xmm4 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edi,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%esp),%edi + andl %edx,%esi + andl %ecx,%ebp + rorl $7,%ebx + addl %esi,%edi + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %edx,%ecx + addl %eax,%edi + movl %ebx,%ebp + pxor %xmm6,%xmm2 +.byte 102,15,58,15,224,8 + xorl %ecx,%ebx + addl 32(%esp),%edx + andl %ecx,%ebp + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm5,%xmm6 + paddd %xmm1,%xmm5 + addl %ebp,%edx + movl %edi,%ebp + pxor %xmm4,%xmm2 + roll $5,%edi + addl %esi,%edx + xorl %ecx,%ebx + addl %edi,%edx + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%esp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%ebp + rorl $7,%edi + psrld $30,%xmm4 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm4,%xmm2 + movl %edi,%ebp + xorl %eax,%edi + movdqa 64(%esp),%xmm4 + addl 40(%esp),%ebx + andl %eax,%ebp + andl %edi,%esi + rorl $7,%edx + addl %ebp,%ebx + movdqa %xmm2,%xmm5 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%edi + addl %ecx,%ebx + movl %edx,%esi + xorl %edi,%edx + addl 44(%esp),%eax + andl %edi,%esi + andl %edx,%ebp + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edi,%edx + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 +.byte 102,15,58,15,233,8 + xorl %edx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + xorl %ecx,%esi + addl %eax,%edi + movdqa %xmm6,%xmm7 + paddd %xmm2,%xmm6 + rorl $7,%ebx + addl %esi,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ecx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + xorl %ebx,%ebp + addl %edi,%edx + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %ebx,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%edi + addl %esi,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + xorl %edi,%ebp + addl %ecx,%ebx + rorl $7,%edx + addl %ebp,%ebx + addl (%esp),%eax + paddd %xmm3,%xmm7 + xorl %edi,%esi + movl %ebx,%ebp + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm7,48(%esp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%esp),%edi + xorl %edx,%ebp + movl %eax,%esi + roll $5,%eax + xorl %ecx,%ebp + addl %eax,%edi + rorl $7,%ebx + addl %ebp,%edi + addl 8(%esp),%edx + xorl %ecx,%esi + movl %edi,%ebp + roll $5,%edi + xorl %ebx,%esi + addl %edi,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%esp),%ecx + xorl %ebx,%ebp + movl %edx,%esi + roll $5,%edx + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edi + addl %ebp,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L005done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%ebp + roll $5,%ecx + paddd %xmm7,%xmm0 + xorl %edi,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,(%esp) + addl 20(%esp),%eax + xorl %edi,%ebp + psubd %xmm7,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + addl 24(%esp),%edi + xorl %edx,%esi + movl %eax,%ebp + roll $5,%eax + xorl %ecx,%esi + addl %eax,%edi + rorl $7,%ebx + addl %esi,%edi + addl 28(%esp),%edx + xorl %ecx,%ebp + movl %edi,%esi + roll $5,%edi + xorl %ebx,%ebp + addl %edi,%edx + rorl $7,%eax + addl %ebp,%edx + addl 32(%esp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%ebp + roll $5,%edx + paddd %xmm7,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%edi + addl %esi,%ecx + movdqa %xmm1,16(%esp) + addl 36(%esp),%ebx + xorl %eax,%ebp + psubd %xmm7,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %edi,%ebp + addl %ecx,%ebx + rorl $7,%edx + addl %ebp,%ebx + addl 40(%esp),%eax + xorl %edi,%esi + movl %ebx,%ebp + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%esp),%edi + xorl %edx,%ebp + movl %eax,%esi + roll $5,%eax + xorl %ecx,%ebp + addl %eax,%edi + rorl $7,%ebx + addl %ebp,%edi + addl 48(%esp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %edi,%ebp + roll $5,%edi + paddd %xmm7,%xmm2 + xorl %ebx,%esi + addl %edi,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%esp) + addl 52(%esp),%ecx + xorl %ebx,%ebp + psubd %xmm7,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edi + addl %ebp,%ecx + addl 56(%esp),%ebx + xorl %eax,%esi + movl %ecx,%ebp + roll $5,%ecx + xorl %edi,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %esi,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movdqa %xmm1,%xmm4 + jmp .L004loop +.align 16 +.L005done: + addl 16(%esp),%ebx + xorl %eax,%esi + movl %ecx,%ebp + roll $5,%ecx + xorl %edi,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + addl 24(%esp),%edi + xorl %edx,%esi + movl %eax,%ebp + roll $5,%eax + xorl %ecx,%esi + addl %eax,%edi + rorl $7,%ebx + addl %esi,%edi + addl 28(%esp),%edx + xorl %ecx,%ebp + movl %edi,%esi + roll $5,%edi + xorl %ebx,%ebp + addl %edi,%edx + rorl $7,%eax + addl %ebp,%edx + addl 32(%esp),%ecx + xorl %ebx,%esi + movl %edx,%ebp + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%edi + addl %esi,%ecx + addl 36(%esp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + xorl %edi,%ebp + addl %ecx,%ebx + rorl $7,%edx + addl %ebp,%ebx + addl 40(%esp),%eax + xorl %edi,%esi + movl %ebx,%ebp + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%esp),%edi + xorl %edx,%ebp + movl %eax,%esi + roll $5,%eax + xorl %ecx,%ebp + addl %eax,%edi + rorl $7,%ebx + addl %ebp,%edi + addl 48(%esp),%edx + xorl %ecx,%esi + movl %edi,%ebp + roll $5,%edi + xorl %ebx,%esi + addl %edi,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%esp),%ecx + xorl %ebx,%ebp + movl %edx,%esi + roll $5,%edx + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edi + addl %ebp,%ecx + addl 56(%esp),%ebx + xorl %eax,%esi + movl %ecx,%ebp + roll $5,%ecx + xorl %edi,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%ebp + addl %ebx,%eax + rorl $7,%ecx + addl %ebp,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.align 64 +.LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/sha/asm/sha1-586.pl b/app/openssl/crypto/sha/asm/sha1-586.pl index a1f87628..2b119ffa 100644 --- a/app/openssl/crypto/sha/asm/sha1-586.pl +++ b/app/openssl/crypto/sha/asm/sha1-586.pl @@ -12,6 +12,8 @@ # commentary below], and in 2006 the rest was rewritten in order to # gain freedom to liberate licensing terms. +# January, September 2004. +# # It was noted that Intel IA-32 C compiler generates code which # performs ~30% *faster* on P4 CPU than original *hand-coded* # SHA1 assembler implementation. To address this problem (and @@ -31,12 +33,92 @@ # ---------------------------------------------------------------- # +# August 2009. +# +# George Spelvin has tipped that F_40_59(b,c,d) can be rewritten as +# '(c&d) + (b&(c^d))', which allows to accumulate partial results +# and lighten "pressure" on scratch registers. This resulted in +# >12% performance improvement on contemporary AMD cores (with no +# degradation on other CPUs:-). Also, the code was revised to maximize +# "distance" between instructions producing input to 'lea' instruction +# and the 'lea' instruction itself, which is essential for Intel Atom +# core and resulted in ~15% improvement. + +# October 2010. +# +# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it +# is to offload message schedule denoted by Wt in NIST specification, +# or Xupdate in OpenSSL source, to SIMD unit. The idea is not novel, +# and in SSE2 context was first explored by Dean Gaudet in 2004, see +# http://arctic.org/~dean/crypto/sha1.html. Since then several things +# have changed that made it interesting again: +# +# a) XMM units became faster and wider; +# b) instruction set became more versatile; +# c) an important observation was made by Max Locktykhin, which made +# it possible to reduce amount of instructions required to perform +# the operation in question, for further details see +# http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/. + +# April 2011. +# +# Add AVX code path, probably most controversial... The thing is that +# switch to AVX alone improves performance by as little as 4% in +# comparison to SSSE3 code path. But below result doesn't look like +# 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as +# pair of µ-ops, and it's the additional µ-ops, two per round, that +# make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded +# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with +# equivalent 'sh[rl]d' that is responsible for the impressive 5.1 +# cycles per processed byte. But 'sh[rl]d' is not something that used +# to be fast, nor does it appear to be fast in upcoming Bulldozer +# [according to its optimization manual]. Which is why AVX code path +# is guarded by *both* AVX and synthetic bit denoting Intel CPUs. +# One can argue that it's unfair to AMD, but without 'sh[rl]d' it +# makes no sense to keep the AVX code path. If somebody feels that +# strongly, it's probably more appropriate to discuss possibility of +# using vector rotate XOP on AMD... + +###################################################################### +# Current performance is summarized in following table. Numbers are +# CPU clock cycles spent to process single byte (less is better). +# +# x86 SSSE3 AVX +# Pentium 15.7 - +# PIII 11.5 - +# P4 10.6 - +# AMD K8 7.1 - +# Core2 7.3 6.1/+20% - +# Atom 12.5 9.5(*)/+32% - +# Westmere 7.3 5.6/+30% - +# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+70% +# +# (*) Loop is 1056 instructions long and expected result is ~8.25. +# It remains mystery [to me] why ILP is limited to 1.7. +# +# (**) As per above comment, the result is for AVX *plus* sh[rl]d. + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386"); +$xmm=$ymm=0; +for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); } + +$ymm=1 if ($xmm && + `$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/ && + $1>=2.19); # first version supporting AVX + +$ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32n" && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && + $1>=2.03); # first version supporting AVX + +&external_label("OPENSSL_ia32cap_P") if ($xmm); + + $A="eax"; $B="ebx"; $C="ecx"; @@ -47,6 +129,10 @@ $tmp1="ebp"; @V=($A,$B,$C,$D,$E,$T); +$alt=0; # 1 denotes alternative IALU implementation, which performs + # 8% *worse* on P4, same on Westmere and Atom, 2% better on + # Sandy Bridge... + sub BODY_00_15 { local($n,$a,$b,$c,$d,$e,$f)=@_; @@ -59,16 +145,18 @@ sub BODY_00_15 &rotl($tmp1,5); # tmp1=ROTATE(a,5) &xor($f,$d); &add($tmp1,$e); # tmp1+=e; - &and($f,$b); - &mov($e,&swtmp($n%16)); # e becomes volatile and is loaded + &mov($e,&swtmp($n%16)); # e becomes volatile and is loaded # with xi, also note that e becomes # f in next round... - &xor($f,$d); # f holds F_00_19(b,c,d) + &and($f,$b); &rotr($b,2); # b=ROTATE(b,30) - &lea($tmp1,&DWP(0x5a827999,$tmp1,$e)); # tmp1+=K_00_19+xi + &xor($f,$d); # f holds F_00_19(b,c,d) + &lea($tmp1,&DWP(0x5a827999,$tmp1,$e)); # tmp1+=K_00_19+xi - if ($n==15) { &add($f,$tmp1); } # f+=tmp1 + if ($n==15) { &mov($e,&swtmp(($n+1)%16));# pre-fetch f for next round + &add($f,$tmp1); } # f+=tmp1 else { &add($tmp1,$f); } # f becomes a in next round + &mov($tmp1,$a) if ($alt && $n==15); } sub BODY_16_19 @@ -77,22 +165,41 @@ sub BODY_16_19 &comment("16_19 $n"); - &mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d) - &xor($f,&swtmp(($n+2)%16)); - &xor($tmp1,$d); - &xor($f,&swtmp(($n+8)%16)); - &and($tmp1,$b); # tmp1 holds F_00_19(b,c,d) - &rotr($b,2); # b=ROTATE(b,30) +if ($alt) { + &xor($c,$d); + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &and($tmp1,$c); # tmp1 to hold F_00_19(b,c,d), b&=c^d + &xor($f,&swtmp(($n+8)%16)); + &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) + &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd + &rotl($f,1); # f=ROTATE(f,1) + &add($e,$tmp1); # e+=F_00_19(b,c,d) + &xor($c,$d); # restore $c + &mov($tmp1,$a); # b in next round + &rotr($b,$n==16?2:7); # b=ROTATE(b,30) + &mov(&swtmp($n%16),$f); # xi=f + &rotl($a,5); # ROTATE(a,5) + &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e + &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round + &add($f,$a); # f+=ROTATE(a,5) +} else { + &mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d) + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &xor($tmp1,$d); + &xor($f,&swtmp(($n+8)%16)); + &and($tmp1,$b); &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd &rotl($f,1); # f=ROTATE(f,1) &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) - &mov(&swtmp($n%16),$f); # xi=f - &lea($f,&DWP(0x5a827999,$f,$e));# f+=K_00_19+e - &mov($e,$a); # e becomes volatile - &rotl($e,5); # e=ROTATE(a,5) - &add($f,$tmp1); # f+=F_00_19(b,c,d) - &add($f,$e); # f+=ROTATE(a,5) + &add($e,$tmp1); # e+=F_00_19(b,c,d) + &mov($tmp1,$a); + &rotr($b,2); # b=ROTATE(b,30) + &mov(&swtmp($n%16),$f); # xi=f + &rotl($tmp1,5); # ROTATE(a,5) + &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e + &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round + &add($f,$tmp1); # f+=ROTATE(a,5) +} } sub BODY_20_39 @@ -102,21 +209,41 @@ sub BODY_20_39 &comment("20_39 $n"); +if ($alt) { + &xor($tmp1,$c); # tmp1 to hold F_20_39(b,c,d), b^=c + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) + &xor($f,&swtmp(($n+8)%16)); + &add($e,$tmp1); # e+=F_20_39(b,c,d) + &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd + &rotl($f,1); # f=ROTATE(f,1) + &mov($tmp1,$a); # b in next round + &rotr($b,7); # b=ROTATE(b,30) + &mov(&swtmp($n%16),$f) if($n<77);# xi=f + &rotl($a,5); # ROTATE(a,5) + &xor($b,$c) if($n==39);# warm up for BODY_40_59 + &and($tmp1,$b) if($n==39); + &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY + &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round + &add($f,$a); # f+=ROTATE(a,5) + &rotr($a,5) if ($n==79); +} else { &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d) - &mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &rotr($b,2); # b=ROTATE(b,30) - &xor($f,&swtmp(($n+2)%16)); + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) &xor($tmp1,$c); &xor($f,&swtmp(($n+8)%16)); &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd &rotl($f,1); # f=ROTATE(f,1) - &add($tmp1,$e); - &mov(&swtmp($n%16),$f); # xi=f - &mov($e,$a); # e becomes volatile - &rotl($e,5); # e=ROTATE(a,5) - &lea($f,&DWP($K,$f,$tmp1)); # f+=K_20_39+e - &add($f,$e); # f+=ROTATE(a,5) + &add($e,$tmp1); # e+=F_20_39(b,c,d) + &rotr($b,2); # b=ROTATE(b,30) + &mov($tmp1,$a); + &rotl($tmp1,5); # ROTATE(a,5) + &mov(&swtmp($n%16),$f) if($n<77);# xi=f + &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY + &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round + &add($f,$tmp1); # f+=ROTATE(a,5) +} } sub BODY_40_59 @@ -125,41 +252,86 @@ sub BODY_40_59 &comment("40_59 $n"); - &mov($f,&swtmp($n%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &mov($tmp1,&swtmp(($n+2)%16)); - &xor($f,$tmp1); - &mov($tmp1,&swtmp(($n+8)%16)); - &xor($f,$tmp1); - &mov($tmp1,&swtmp(($n+13)%16)); - &xor($f,$tmp1); # f holds xa^xb^xc^xd - &mov($tmp1,$b); # tmp1 to hold F_40_59(b,c,d) +if ($alt) { + &add($e,$tmp1); # e+=b&(c^d) + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &mov($tmp1,$d); + &xor($f,&swtmp(($n+8)%16)); + &xor($c,$d); # restore $c + &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd &rotl($f,1); # f=ROTATE(f,1) - &or($tmp1,$c); - &mov(&swtmp($n%16),$f); # xi=f - &and($tmp1,$d); - &lea($f,&DWP(0x8f1bbcdc,$f,$e));# f+=K_40_59+e - &mov($e,$b); # e becomes volatile and is used - # to calculate F_40_59(b,c,d) + &and($tmp1,$c); + &rotr($b,7); # b=ROTATE(b,30) + &add($e,$tmp1); # e+=c&d + &mov($tmp1,$a); # b in next round + &mov(&swtmp($n%16),$f); # xi=f + &rotl($a,5); # ROTATE(a,5) + &xor($b,$c) if ($n<59); + &and($tmp1,$b) if ($n<59);# tmp1 to hold F_40_59(b,c,d) + &lea($f,&DWP(0x8f1bbcdc,$f,$e));# f+=K_40_59+e+(b&(c^d)) + &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round + &add($f,$a); # f+=ROTATE(a,5) +} else { + &mov($tmp1,$c); # tmp1 to hold F_40_59(b,c,d) + &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &xor($tmp1,$d); + &xor($f,&swtmp(($n+8)%16)); + &and($tmp1,$b); + &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd + &rotl($f,1); # f=ROTATE(f,1) + &add($tmp1,$e); # b&(c^d)+=e &rotr($b,2); # b=ROTATE(b,30) - &and($e,$c); - &or($tmp1,$e); # tmp1 holds F_40_59(b,c,d) - &mov($e,$a); - &rotl($e,5); # e=ROTATE(a,5) - &add($f,$tmp1); # f+=tmp1; + &mov($e,$a); # e becomes volatile + &rotl($e,5); # ROTATE(a,5) + &mov(&swtmp($n%16),$f); # xi=f + &lea($f,&DWP(0x8f1bbcdc,$f,$tmp1));# f+=K_40_59+e+(b&(c^d)) + &mov($tmp1,$c); &add($f,$e); # f+=ROTATE(a,5) + &and($tmp1,$d); + &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round + &add($f,$tmp1); # f+=c&d +} } &function_begin("sha1_block_data_order"); +if ($xmm) { + &static_label("ssse3_shortcut"); + &static_label("avx_shortcut") if ($ymm); + &static_label("K_XX_XX"); + + &call (&label("pic_point")); # make it PIC! + &set_label("pic_point"); + &blindpop($tmp1); + &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); + &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); + + &mov ($A,&DWP(0,$T)); + &mov ($D,&DWP(4,$T)); + &test ($D,1<<9); # check SSSE3 bit + &jz (&label("x86")); + &test ($A,1<<24); # check FXSR bit + &jz (&label("x86")); + if ($ymm) { + &and ($D,1<<28); # mask AVX bit + &and ($A,1<<30); # mask "Intel CPU" bit + &or ($A,$D); + &cmp ($A,1<<28|1<<30); + &je (&label("avx_shortcut")); + } + &jmp (&label("ssse3_shortcut")); + &set_label("x86",16); +} &mov($tmp1,&wparam(0)); # SHA_CTX *c &mov($T,&wparam(1)); # const void *input &mov($A,&wparam(2)); # size_t num - &stack_push(16); # allocate X[16] + &stack_push(16+3); # allocate X[16] &shl($A,6); &add($A,$T); &mov(&wparam(2),$A); # pointer beyond the end of input &mov($E,&DWP(16,$tmp1));# pre-load E + &jmp(&label("loop")); - &set_label("loop",16); +&set_label("loop",16); # copy input chunk to X, but reversing byte order! for ($i=0; $i<16; $i+=4) @@ -213,8 +385,845 @@ sub BODY_40_59 &mov(&DWP(16,$tmp1),$C); &jb(&label("loop")); - &stack_pop(16); + &stack_pop(16+3); &function_end("sha1_block_data_order"); + +if ($xmm) { +###################################################################### +# The SSSE3 implementation. +# +# %xmm[0-7] are used as ring @X[] buffer containing quadruples of last +# 32 elements of the message schedule or Xupdate outputs. First 4 +# quadruples are simply byte-swapped input, next 4 are calculated +# according to method originally suggested by Dean Gaudet (modulo +# being implemented in SSSE3). Once 8 quadruples or 32 elements are +# collected, it switches to routine proposed by Max Locktyukhin. +# +# Calculations inevitably require temporary reqisters, and there are +# no %xmm registers left to spare. For this reason part of the ring +# buffer, X[2..4] to be specific, is offloaded to 3 quadriples ring +# buffer on the stack. Keep in mind that X[2] is alias X[-6], X[3] - +# X[-5], and X[4] - X[-4]... +# +# Another notable optimization is aggressive stack frame compression +# aiming to minimize amount of 9-byte instructions... +# +# Yet another notable optimization is "jumping" $B variable. It means +# that there is no register permanently allocated for $B value. This +# allowed to eliminate one instruction from body_20_39... +# +my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded +my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 +my @V=($A,$B,$C,$D,$E); +my $j=0; # hash round +my @T=($T,$tmp1); +my $inp; + +my $_rol=sub { &rol(@_) }; +my $_ror=sub { &ror(@_) }; + +&function_begin("_sha1_block_data_order_ssse3"); + &call (&label("pic_point")); # make it PIC! + &set_label("pic_point"); + &blindpop($tmp1); + &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); +&set_label("ssse3_shortcut"); + + &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 + &movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39 + &movdqa (@X[5],&QWP(32,$tmp1)); # K_40_59 + &movdqa (@X[6],&QWP(48,$tmp1)); # K_60_79 + &movdqa (@X[2],&QWP(64,$tmp1)); # pbswap mask + + &mov ($E,&wparam(0)); # load argument block + &mov ($inp=@T[1],&wparam(1)); + &mov ($D,&wparam(2)); + &mov (@T[0],"esp"); + + # stack frame layout + # + # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area + # X[4]+K X[5]+K X[6]+K X[7]+K + # X[8]+K X[9]+K X[10]+K X[11]+K + # X[12]+K X[13]+K X[14]+K X[15]+K + # + # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area + # X[4] X[5] X[6] X[7] + # X[8] X[9] X[10] X[11] # even borrowed for K_00_19 + # + # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants + # K_40_59 K_40_59 K_40_59 K_40_59 + # K_60_79 K_60_79 K_60_79 K_60_79 + # K_00_19 K_00_19 K_00_19 K_00_19 + # pbswap mask + # + # +192 ctx # argument block + # +196 inp + # +200 end + # +204 esp + &sub ("esp",208); + &and ("esp",-64); + + &movdqa (&QWP(112+0,"esp"),@X[4]); # copy constants + &movdqa (&QWP(112+16,"esp"),@X[5]); + &movdqa (&QWP(112+32,"esp"),@X[6]); + &shl ($D,6); # len*64 + &movdqa (&QWP(112+48,"esp"),@X[3]); + &add ($D,$inp); # end of input + &movdqa (&QWP(112+64,"esp"),@X[2]); + &add ($inp,64); + &mov (&DWP(192+0,"esp"),$E); # save argument block + &mov (&DWP(192+4,"esp"),$inp); + &mov (&DWP(192+8,"esp"),$D); + &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp + + &mov ($A,&DWP(0,$E)); # load context + &mov ($B,&DWP(4,$E)); + &mov ($C,&DWP(8,$E)); + &mov ($D,&DWP(12,$E)); + &mov ($E,&DWP(16,$E)); + &mov (@T[0],$B); # magic seed + + &movdqu (@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3] + &movdqu (@X[-3&7],&QWP(-48,$inp)); + &movdqu (@X[-2&7],&QWP(-32,$inp)); + &movdqu (@X[-1&7],&QWP(-16,$inp)); + &pshufb (@X[-4&7],@X[2]); # byte swap + &pshufb (@X[-3&7],@X[2]); + &pshufb (@X[-2&7],@X[2]); + &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot + &pshufb (@X[-1&7],@X[2]); + &paddd (@X[-4&7],@X[3]); # add K_00_19 + &paddd (@X[-3&7],@X[3]); + &paddd (@X[-2&7],@X[3]); + &movdqa (&QWP(0,"esp"),@X[-4&7]); # X[]+K xfer to IALU + &psubd (@X[-4&7],@X[3]); # restore X[] + &movdqa (&QWP(0+16,"esp"),@X[-3&7]); + &psubd (@X[-3&7],@X[3]); + &movdqa (&QWP(0+32,"esp"),@X[-2&7]); + &psubd (@X[-2&7],@X[3]); + &movdqa (@X[0],@X[-3&7]); + &jmp (&label("loop")); + +###################################################################### +# SSE instruction sequence is first broken to groups of indepentent +# instructions, independent in respect to their inputs and shifter +# (not all architectures have more than one). Then IALU instructions +# are "knitted in" between the SSE groups. Distance is maintained for +# SSE latency of 2 in hope that it fits better upcoming AMD Bulldozer +# [which allegedly also implements SSSE3]... +# +# Temporary registers usage. X[2] is volatile at the entry and at the +# end is restored from backtrace ring buffer. X[3] is expected to +# contain current K_XX_XX constant and is used to caclulate X[-1]+K +# from previous round, it becomes volatile the moment the value is +# saved to stack for transfer to IALU. X[4] becomes volatile whenever +# X[-4] is accumulated and offloaded to backtrace ring buffer, at the +# end it is loaded with next K_XX_XX [which becomes X[3] in next +# round]... +# +sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + &movdqa (@X[2],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + + &paddd (@X[3],@X[-1&7]); + &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + &psrldq (@X[2],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[2],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (@X[4],@X[0]); + &movdqa (@X[2],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pslldq (@X[4],12); # "X[0]"<<96, extract one dword + &paddd (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@X[2],31); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@X[3],@X[4]); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@X[4],30); + &por (@X[0],@X[2]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + + &pslld (@X[3],2); + &pxor (@X[0],@X[4]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@X[3]); # "X[0]"^=("X[0]"<<96)<<<2 + &movdqa (@X[1],@X[-2&7]) if ($Xi<7); + eval(shift(@insns)); + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xupdate_ssse3_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &movdqa (@X[2],@X[-1&7]) if ($Xi==8); + eval(shift(@insns)); # body_20_39 + &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + &palignr(@X[2],@X[-2&7],8); # compose "X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + if ($Xi%5) { + &movdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX... + } else { # ... or load next one + &movdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp")); + } + &paddd (@X[3],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pxor (@X[0],@X[2]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &movdqa (@X[2],@X[0]); + &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pslld (@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &psrld (@X[2],30); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &por (@X[0],@X[2]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + &movdqa (@X[3],@X[0]) if ($Xi<19); + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xuplast_ssse3_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &paddd (@X[3],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &mov ($inp=@T[1],&DWP(192+4,"esp")); + &cmp ($inp,&DWP(192+8,"esp")); + &je (&label("done")); + + &movdqa (@X[3],&QWP(112+48,"esp")); # K_00_19 + &movdqa (@X[2],&QWP(112+64,"esp")); # pbswap mask + &movdqu (@X[-4&7],&QWP(0,$inp)); # load input + &movdqu (@X[-3&7],&QWP(16,$inp)); + &movdqu (@X[-2&7],&QWP(32,$inp)); + &movdqu (@X[-1&7],&QWP(48,$inp)); + &add ($inp,64); + &pshufb (@X[-4&7],@X[2]); # byte swap + &mov (&DWP(192+4,"esp"),$inp); + &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot + + $Xi=0; +} + +sub Xloop_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &pshufb (@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[($Xi-4)&7],@X[3]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (&QWP(0+16*$Xi,"esp"),@X[($Xi-4)&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + &psubd (@X[($Xi-4)&7],@X[3]); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +sub body_00_19 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer + '&xor ($c,$d);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&xor ($c,$d);', # restore $c + '&xor (@T[0],$d);', + '&add ($e,$a);', + '&$_ror ($b,$j?7:2);', # $b>>>2 + '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub body_20_39 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer + '&xor (@T[0],$d);', # ($b^$d) + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&xor (@T[0],$c);', # ($b^$d^$c) + '&add ($e,$a);', + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub body_40_59 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&mov (@T[1],$c);', + '&xor ($c,$d);', + '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer + '&and (@T[1],$d);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[1]);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&add ($e,@T[0]);', + '&xor ($c,$d);', # restore $c + '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +&set_label("loop",16); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + + &mov (@T[1],&DWP(192,"esp")); # update context + &add ($A,&DWP(0,@T[1])); + &add (@T[0],&DWP(4,@T[1])); # $b + &add ($C,&DWP(8,@T[1])); + &mov (&DWP(0,@T[1]),$A); + &add ($D,&DWP(12,@T[1])); + &mov (&DWP(4,@T[1]),@T[0]); + &add ($E,&DWP(16,@T[1])); + &mov (&DWP(8,@T[1]),$C); + &mov ($B,@T[0]); + &mov (&DWP(12,@T[1]),$D); + &mov (&DWP(16,@T[1]),$E); + &movdqa (@X[0],@X[-3&7]); + + &jmp (&label("loop")); + +&set_label("done",16); $j=$saved_j; @V=@saved_V; + + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + + &mov (@T[1],&DWP(192,"esp")); # update context + &add ($A,&DWP(0,@T[1])); + &mov ("esp",&DWP(192+12,"esp")); # restore %esp + &add (@T[0],&DWP(4,@T[1])); # $b + &add ($C,&DWP(8,@T[1])); + &mov (&DWP(0,@T[1]),$A); + &add ($D,&DWP(12,@T[1])); + &mov (&DWP(4,@T[1]),@T[0]); + &add ($E,&DWP(16,@T[1])); + &mov (&DWP(8,@T[1]),$C); + &mov (&DWP(12,@T[1]),$D); + &mov (&DWP(16,@T[1]),$E); + +&function_end("_sha1_block_data_order_ssse3"); + +if ($ymm) { +my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded +my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 +my @V=($A,$B,$C,$D,$E); +my $j=0; # hash round +my @T=($T,$tmp1); +my $inp; + +my $_rol=sub { &shld(@_[0],@_) }; +my $_ror=sub { &shrd(@_[0],@_) }; + +&function_begin("_sha1_block_data_order_avx"); + &call (&label("pic_point")); # make it PIC! + &set_label("pic_point"); + &blindpop($tmp1); + &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); +&set_label("avx_shortcut"); + &vzeroall(); + + &vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19 + &vmovdqa(@X[4],&QWP(16,$tmp1)); # K_20_39 + &vmovdqa(@X[5],&QWP(32,$tmp1)); # K_40_59 + &vmovdqa(@X[6],&QWP(48,$tmp1)); # K_60_79 + &vmovdqa(@X[2],&QWP(64,$tmp1)); # pbswap mask + + &mov ($E,&wparam(0)); # load argument block + &mov ($inp=@T[1],&wparam(1)); + &mov ($D,&wparam(2)); + &mov (@T[0],"esp"); + + # stack frame layout + # + # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area + # X[4]+K X[5]+K X[6]+K X[7]+K + # X[8]+K X[9]+K X[10]+K X[11]+K + # X[12]+K X[13]+K X[14]+K X[15]+K + # + # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area + # X[4] X[5] X[6] X[7] + # X[8] X[9] X[10] X[11] # even borrowed for K_00_19 + # + # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants + # K_40_59 K_40_59 K_40_59 K_40_59 + # K_60_79 K_60_79 K_60_79 K_60_79 + # K_00_19 K_00_19 K_00_19 K_00_19 + # pbswap mask + # + # +192 ctx # argument block + # +196 inp + # +200 end + # +204 esp + &sub ("esp",208); + &and ("esp",-64); + + &vmovdqa(&QWP(112+0,"esp"),@X[4]); # copy constants + &vmovdqa(&QWP(112+16,"esp"),@X[5]); + &vmovdqa(&QWP(112+32,"esp"),@X[6]); + &shl ($D,6); # len*64 + &vmovdqa(&QWP(112+48,"esp"),@X[3]); + &add ($D,$inp); # end of input + &vmovdqa(&QWP(112+64,"esp"),@X[2]); + &add ($inp,64); + &mov (&DWP(192+0,"esp"),$E); # save argument block + &mov (&DWP(192+4,"esp"),$inp); + &mov (&DWP(192+8,"esp"),$D); + &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp + + &mov ($A,&DWP(0,$E)); # load context + &mov ($B,&DWP(4,$E)); + &mov ($C,&DWP(8,$E)); + &mov ($D,&DWP(12,$E)); + &mov ($E,&DWP(16,$E)); + &mov (@T[0],$B); # magic seed + + &vmovdqu(@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3] + &vmovdqu(@X[-3&7],&QWP(-48,$inp)); + &vmovdqu(@X[-2&7],&QWP(-32,$inp)); + &vmovdqu(@X[-1&7],&QWP(-16,$inp)); + &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap + &vpshufb(@X[-3&7],@X[-3&7],@X[2]); + &vpshufb(@X[-2&7],@X[-2&7],@X[2]); + &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot + &vpshufb(@X[-1&7],@X[-1&7],@X[2]); + &vpaddd (@X[0],@X[-4&7],@X[3]); # add K_00_19 + &vpaddd (@X[1],@X[-3&7],@X[3]); + &vpaddd (@X[2],@X[-2&7],@X[3]); + &vmovdqa(&QWP(0,"esp"),@X[0]); # X[]+K xfer to IALU + &vmovdqa(&QWP(0+16,"esp"),@X[1]); + &vmovdqa(&QWP(0+32,"esp"),@X[2]); + &jmp (&label("loop")); + +sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpaddd (@X[3],@X[3],@X[-1&7]); + &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrldq(@X[2],@X[-1&7],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[2],@X[2],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@X[2],@X[0],31); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslldq(@X[4],@X[0],12); # "X[0]"<<96, extract one dword + &vpaddd (@X[0],@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@X[3],@X[4],30); + &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslld (@X[4],@X[4],2); + &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor (@X[0],@X[0],@X[3]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@X[4]); # "X[0]"^=("X[0]"<<96)<<<2 + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xupdate_avx_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &vpalignr(@X[2],@X[-1&7],@X[-2&7],8); # compose "X[-6]" + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); + if ($Xi%5) { + &vmovdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX... + } else { # ... or load next one + &vmovdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp")); + } + &vpaddd (@X[3],@X[3],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpsrld (@X[2],@X[0],30); + &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpslld (@X[0],@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xuplast_avx_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &vpaddd (@X[3],@X[3],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &mov ($inp=@T[1],&DWP(192+4,"esp")); + &cmp ($inp,&DWP(192+8,"esp")); + &je (&label("done")); + + &vmovdqa(@X[3],&QWP(112+48,"esp")); # K_00_19 + &vmovdqa(@X[2],&QWP(112+64,"esp")); # pbswap mask + &vmovdqu(@X[-4&7],&QWP(0,$inp)); # load input + &vmovdqu(@X[-3&7],&QWP(16,$inp)); + &vmovdqu(@X[-2&7],&QWP(32,$inp)); + &vmovdqu(@X[-1&7],&QWP(48,$inp)); + &add ($inp,64); + &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap + &mov (&DWP(192+4,"esp"),$inp); + &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot + + $Xi=0; +} + +sub Xloop_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpshufb (@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@X[3]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (&QWP(0+16*$Xi,"esp"),@X[$Xi&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +&set_label("loop",16); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_32_79(\&body_00_19); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_20_39); + &Xuplast_avx_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + + &mov (@T[1],&DWP(192,"esp")); # update context + &add ($A,&DWP(0,@T[1])); + &add (@T[0],&DWP(4,@T[1])); # $b + &add ($C,&DWP(8,@T[1])); + &mov (&DWP(0,@T[1]),$A); + &add ($D,&DWP(12,@T[1])); + &mov (&DWP(4,@T[1]),@T[0]); + &add ($E,&DWP(16,@T[1])); + &mov (&DWP(8,@T[1]),$C); + &mov ($B,@T[0]); + &mov (&DWP(12,@T[1]),$D); + &mov (&DWP(16,@T[1]),$E); + + &jmp (&label("loop")); + +&set_label("done",16); $j=$saved_j; @V=@saved_V; + + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + + &vzeroall(); + + &mov (@T[1],&DWP(192,"esp")); # update context + &add ($A,&DWP(0,@T[1])); + &mov ("esp",&DWP(192+12,"esp")); # restore %esp + &add (@T[0],&DWP(4,@T[1])); # $b + &add ($C,&DWP(8,@T[1])); + &mov (&DWP(0,@T[1]),$A); + &add ($D,&DWP(12,@T[1])); + &mov (&DWP(4,@T[1]),@T[0]); + &add ($E,&DWP(16,@T[1])); + &mov (&DWP(8,@T[1]),$C); + &mov (&DWP(12,@T[1]),$D); + &mov (&DWP(16,@T[1]),$E); +&function_end("_sha1_block_data_order_avx"); +} +&set_label("K_XX_XX",64); +&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 +&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 +&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 +&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 +&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask +} &asciz("SHA1 block transform for x86, CRYPTOGAMS by "); &asm_finish(); diff --git a/app/openssl/crypto/sha/asm/sha1-alpha.pl b/app/openssl/crypto/sha/asm/sha1-alpha.pl new file mode 100644 index 00000000..6c4b9251 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-alpha.pl @@ -0,0 +1,322 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA1 block procedure for Alpha. + +# On 21264 performance is 33% better than code generated by vendor +# compiler, and 75% better than GCC [3.4], and in absolute terms is +# 8.7 cycles per processed byte. Implementation features vectorized +# byte swap, but not Xupdate. + +@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", + "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); +$ctx="a0"; # $16 +$inp="a1"; +$num="a2"; +$A="a3"; +$B="a4"; # 20 +$C="a5"; +$D="t8"; +$E="t9"; @V=($A,$B,$C,$D,$E); +$t0="t10"; # 24 +$t1="t11"; +$t2="ra"; +$t3="t12"; +$K="AT"; # 28 + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i==0); + ldq_u @X[0],0+0($inp) + ldq_u @X[1],0+7($inp) +___ +$code.=<<___ if (!($i&1) && $i<14); + ldq_u @X[$i+2],($i+2)*4+0($inp) + ldq_u @X[$i+3],($i+2)*4+7($inp) +___ +$code.=<<___ if (!($i&1) && $i<15); + extql @X[$i],$inp,@X[$i] + extqh @X[$i+1],$inp,@X[$i+1] + + or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched + + srl @X[$i],24,$t0 # vectorized byte swap + srl @X[$i],8,$t2 + + sll @X[$i],8,$t3 + sll @X[$i],24,@X[$i] + zapnot $t0,0x11,$t0 + zapnot $t2,0x22,$t2 + + zapnot @X[$i],0x88,@X[$i] + or $t0,$t2,$t0 + zapnot $t3,0x44,$t3 + sll $a,5,$t1 + + or @X[$i],$t0,@X[$i] + addl $K,$e,$e + and $b,$c,$t2 + zapnot $a,0xf,$a + + or @X[$i],$t3,@X[$i] + srl $a,27,$t0 + bic $d,$b,$t3 + sll $b,30,$b + + extll @X[$i],4,@X[$i+1] # extract upper half + or $t2,$t3,$t2 + addl @X[$i],$e,$e + + addl $t1,$e,$e + srl $b,32,$t3 + zapnot @X[$i],0xf,@X[$i] + + addl $t0,$e,$e + addl $t2,$e,$e + or $t3,$b,$b +___ +$code.=<<___ if (($i&1) && $i<15); + sll $a,5,$t1 + addl $K,$e,$e + and $b,$c,$t2 + zapnot $a,0xf,$a + + srl $a,27,$t0 + addl @X[$i%16],$e,$e + bic $d,$b,$t3 + sll $b,30,$b + + or $t2,$t3,$t2 + addl $t1,$e,$e + srl $b,32,$t3 + zapnot @X[$i],0xf,@X[$i] + + addl $t0,$e,$e + addl $t2,$e,$e + or $t3,$b,$b +___ +$code.=<<___ if ($i>=15); # with forward Xupdate + sll $a,5,$t1 + addl $K,$e,$e + and $b,$c,$t2 + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + + zapnot $a,0xf,$a + addl @X[$i%16],$e,$e + bic $d,$b,$t3 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + + srl $a,27,$t0 + addl $t1,$e,$e + or $t2,$t3,$t2 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + + sll $b,30,$b + addl $t0,$e,$e + srl @X[$j%16],31,$t1 + + addl $t2,$e,$e + srl $b,32,$t3 + addl @X[$j%16],@X[$j%16],@X[$j%16] + + or $t3,$b,$b + zapnot @X[$i%16],0xf,@X[$i%16] + or $t1,@X[$j%16],@X[$j%16] +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); # with forward Xupdate + sll $a,5,$t1 + addl $K,$e,$e + zapnot $a,0xf,$a + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + + sll $b,30,$t3 + addl $t1,$e,$e + xor $b,$c,$t2 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + + srl $b,2,$b + addl @X[$i%16],$e,$e + xor $d,$t2,$t2 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + + srl @X[$j%16],31,$t1 + addl $t2,$e,$e + srl $a,27,$t0 + addl @X[$j%16],@X[$j%16],@X[$j%16] + + or $t3,$b,$b + addl $t0,$e,$e + or $t1,@X[$j%16],@X[$j%16] +___ +$code.=<<___ if ($i<77); + zapnot @X[$i%16],0xf,@X[$i%16] +___ +$code.=<<___ if ($i==79); # with context fetch + sll $a,5,$t1 + addl $K,$e,$e + zapnot $a,0xf,$a + ldl @X[0],0($ctx) + + sll $b,30,$t3 + addl $t1,$e,$e + xor $b,$c,$t2 + ldl @X[1],4($ctx) + + srl $b,2,$b + addl @X[$i%16],$e,$e + xor $d,$t2,$t2 + ldl @X[2],8($ctx) + + srl $a,27,$t0 + addl $t2,$e,$e + ldl @X[3],12($ctx) + + or $t3,$b,$b + addl $t0,$e,$e + ldl @X[4],16($ctx) +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___; # with forward Xupdate + sll $a,5,$t1 + addl $K,$e,$e + zapnot $a,0xf,$a + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + + srl $a,27,$t0 + and $b,$c,$t2 + and $b,$d,$t3 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + + sll $b,30,$b + addl $t1,$e,$e + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + + srl @X[$j%16],31,$t1 + addl $t0,$e,$e + or $t2,$t3,$t2 + and $c,$d,$t3 + + or $t2,$t3,$t2 + srl $b,32,$t3 + addl @X[$i%16],$e,$e + addl @X[$j%16],@X[$j%16],@X[$j%16] + + or $t3,$b,$b + addl $t2,$e,$e + or $t1,@X[$j%16],@X[$j%16] + zapnot @X[$i%16],0xf,@X[$i%16] +___ +} + +$code=<<___; +#ifdef __linux__ +#include +#else +#include +#include +#endif + +.text + +.set noat +.set noreorder +.globl sha1_block_data_order +.align 5 +.ent sha1_block_data_order +sha1_block_data_order: + lda sp,-64(sp) + stq ra,0(sp) + stq s0,8(sp) + stq s1,16(sp) + stq s2,24(sp) + stq s3,32(sp) + stq s4,40(sp) + stq s5,48(sp) + stq fp,56(sp) + .mask 0x0400fe00,-64 + .frame sp,64,ra + .prologue 0 + + ldl $A,0($ctx) + ldl $B,4($ctx) + sll $num,6,$num + ldl $C,8($ctx) + ldl $D,12($ctx) + ldl $E,16($ctx) + addq $inp,$num,$num + +.Lloop: + .set noreorder + ldah $K,23170(zero) + zapnot $B,0xf,$B + lda $K,31129($K) # K_00_19 +___ +for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + ldah $K,28378(zero) + lda $K,-5215($K) # K_20_39 +___ +for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + ldah $K,-28900(zero) + lda $K,-17188($K) # K_40_59 +___ +for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + ldah $K,-13725(zero) + lda $K,-15914($K) # K_60_79 +___ +for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + addl @X[0],$A,$A + addl @X[1],$B,$B + addl @X[2],$C,$C + addl @X[3],$D,$D + addl @X[4],$E,$E + stl $A,0($ctx) + stl $B,4($ctx) + addq $inp,64,$inp + stl $C,8($ctx) + stl $D,12($ctx) + stl $E,16($ctx) + cmpult $inp,$num,$t1 + bne $t1,.Lloop + + .set noreorder + ldq ra,0(sp) + ldq s0,8(sp) + ldq s1,16(sp) + ldq s2,24(sp) + ldq s3,32(sp) + ldq s4,40(sp) + ldq s5,48(sp) + ldq fp,56(sp) + lda sp,64(sp) + ret (ra) +.end sha1_block_data_order +.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by " +.align 2 +___ +$output=shift and open STDOUT,">$output"; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.S b/app/openssl/crypto/sha/asm/sha1-armv4-large.S new file mode 120000 index 00000000..6523cbdd --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.S @@ -0,0 +1 @@ +sha1-armv4-large.s \ No newline at end of file diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl index 79e3f613..33da3e0e 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl @@ -47,6 +47,10 @@ # Cortex A8 core and in absolute terms ~870 cycles per input block # [or 13.6 cycles per byte]. +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 10% +# improvement on Cortex A8 core and 12.2 cycles per byte. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -76,31 +80,41 @@ $code.=<<___; add $e,$K,$e,ror#2 @ E+=K_xx_xx ldr $t3,[$Xi,#2*4] eor $t0,$t0,$t1 - eor $t2,$t2,$t3 + eor $t2,$t2,$t3 @ 1 cycle stall eor $t1,$c,$d @ F_xx_xx mov $t0,$t0,ror#31 add $e,$e,$a,ror#27 @ E+=ROR(A,27) eor $t0,$t0,$t2,ror#31 + str $t0,[$Xi,#-4]! $opt1 @ F_xx_xx $opt2 @ F_xx_xx add $e,$e,$t0 @ E+=X[i] - str $t0,[$Xi,#-4]! ___ } sub BODY_00_15 { my ($a,$b,$c,$d,$e)=@_; $code.=<<___; - ldrb $t0,[$inp],#4 - ldrb $t1,[$inp,#-1] - ldrb $t2,[$inp,#-2] +#if __ARM_ARCH__<7 + ldrb $t1,[$inp,#2] + ldrb $t0,[$inp,#3] + ldrb $t2,[$inp,#1] add $e,$K,$e,ror#2 @ E+=K_00_19 - ldrb $t3,[$inp,#-3] + ldrb $t3,[$inp],#4 + orr $t0,$t0,$t1,lsl#8 + eor $t1,$c,$d @ F_xx_xx + orr $t0,$t0,$t2,lsl#16 add $e,$e,$a,ror#27 @ E+=ROR(A,27) - orr $t0,$t1,$t0,lsl#24 + orr $t0,$t0,$t3,lsl#24 +#else + ldr $t0,[$inp],#4 @ handles unaligned + add $e,$K,$e,ror#2 @ E+=K_00_19 eor $t1,$c,$d @ F_xx_xx - orr $t0,$t0,$t2,lsl#8 - orr $t0,$t0,$t3,lsl#16 + add $e,$e,$a,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev $t0,$t0 @ byte swap +#endif +#endif and $t1,$b,$t1,ror#2 add $e,$e,$t0 @ E+=X[i] eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) @@ -136,6 +150,8 @@ ___ } $code=<<___; +#include "arm_arch.h" + .text .global sha1_block_data_order @@ -161,7 +177,7 @@ for($i=0;$i<5;$i++) { $code.=<<___; teq $Xi,sp bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#5*4 + sub sp,sp,#25*4 ___ &BODY_00_15(@V); unshift(@V,pop(@V)); &BODY_16_19(@V); unshift(@V,pop(@V)); @@ -171,7 +187,6 @@ ___ $code.=<<___; ldr $K,.LK_20_39 @ [+15+16*4] - sub sp,sp,#20*4 cmn sp,#0 @ [+3], clear carry to denote 20_39 .L_20_39_or_60_79: ___ @@ -210,10 +225,14 @@ $code.=<<___; teq $inp,$len bne .Lloop @ [+18], total 1307 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.s b/app/openssl/crypto/sha/asm/sha1-armv4-large.s index 7f687d9f..639ae78a 100644 --- a/app/openssl/crypto/sha/asm/sha1-armv4-large.s +++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.s @@ -1,3 +1,5 @@ +#include "arm_arch.h" + .text .global sha1_block_data_order @@ -16,76 +18,126 @@ sha1_block_data_order: mov r6,r6,ror#30 mov r7,r7,ror#30 @ [6] .L_00_15: - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r4,r10,ror#2 add r7,r7,r9 @ E+=X[i] eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r7,r7,r10 @ E+=F_00_19(B,C,D) - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r6,r8,r6,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r6,r6,r7,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 eor r10,r4,r5 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r3,r10,ror#2 add r6,r6,r9 @ E+=X[i] eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r6,r6,r10 @ E+=F_00_19(B,C,D) - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r5,r8,r5,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r5,r5,r6,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 eor r10,r3,r4 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r7,r10,ror#2 add r5,r5,r9 @ E+=X[i] eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r5,r5,r10 @ E+=F_00_19(B,C,D) - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r4,r8,r4,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r4,r4,r5,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 eor r10,r7,r3 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r6,r10,ror#2 add r4,r4,r9 @ E+=X[i] eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r4,r4,r10 @ E+=F_00_19(B,C,D) - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r3,r8,r3,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r3,r3,r4,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 eor r10,r6,r7 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r5,r10,ror#2 add r3,r3,r9 @ E+=X[i] eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) @@ -93,17 +145,27 @@ sha1_block_data_order: add r3,r3,r10 @ E+=F_00_19(B,C,D) teq r14,sp bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#5*4 - ldrb r9,[r1],#4 - ldrb r10,[r1,#-1] - ldrb r11,[r1,#-2] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1,#-3] + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r10,r9,lsl#24 + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#8 - orr r9,r9,r12,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif and r10,r4,r10,ror#2 add r7,r7,r9 @ E+=X[i] eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) @@ -115,15 +177,15 @@ sha1_block_data_order: add r6,r8,r6,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r4,r5 @ F_xx_xx mov r9,r9,ror#31 add r6,r6,r7,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r3,r10,ror#2 @ F_xx_xx @ F_xx_xx add r6,r6,r9 @ E+=X[i] - str r9,[r14,#-4]! eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) add r6,r6,r10 @ E+=F_00_19(B,C,D) ldr r9,[r14,#15*4] @@ -132,15 +194,15 @@ sha1_block_data_order: add r5,r8,r5,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r3,r4 @ F_xx_xx mov r9,r9,ror#31 add r5,r5,r6,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r7,r10,ror#2 @ F_xx_xx @ F_xx_xx add r5,r5,r9 @ E+=X[i] - str r9,[r14,#-4]! eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) add r5,r5,r10 @ E+=F_00_19(B,C,D) ldr r9,[r14,#15*4] @@ -149,15 +211,15 @@ sha1_block_data_order: add r4,r8,r4,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r7,r3 @ F_xx_xx mov r9,r9,ror#31 add r4,r4,r5,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r6,r10,ror#2 @ F_xx_xx @ F_xx_xx add r4,r4,r9 @ E+=X[i] - str r9,[r14,#-4]! eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) add r4,r4,r10 @ E+=F_00_19(B,C,D) ldr r9,[r14,#15*4] @@ -166,20 +228,19 @@ sha1_block_data_order: add r3,r8,r3,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r6,r7 @ F_xx_xx mov r9,r9,ror#31 add r3,r3,r4,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r5,r10,ror#2 @ F_xx_xx @ F_xx_xx add r3,r3,r9 @ E+=X[i] - str r9,[r14,#-4]! eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) add r3,r3,r10 @ E+=F_00_19(B,C,D) ldr r8,.LK_20_39 @ [+15+16*4] - sub sp,sp,#20*4 cmn sp,#0 @ [+3], clear carry to denote 20_39 .L_20_39_or_60_79: ldr r9,[r14,#15*4] @@ -188,15 +249,15 @@ sha1_block_data_order: add r7,r8,r7,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r5,r6 @ F_xx_xx mov r9,r9,ror#31 add r7,r7,r3,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! eor r10,r4,r10,ror#2 @ F_xx_xx @ F_xx_xx add r7,r7,r9 @ E+=X[i] - str r9,[r14,#-4]! add r7,r7,r10 @ E+=F_20_39(B,C,D) ldr r9,[r14,#15*4] ldr r10,[r14,#13*4] @@ -204,15 +265,15 @@ sha1_block_data_order: add r6,r8,r6,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r4,r5 @ F_xx_xx mov r9,r9,ror#31 add r6,r6,r7,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! eor r10,r3,r10,ror#2 @ F_xx_xx @ F_xx_xx add r6,r6,r9 @ E+=X[i] - str r9,[r14,#-4]! add r6,r6,r10 @ E+=F_20_39(B,C,D) ldr r9,[r14,#15*4] ldr r10,[r14,#13*4] @@ -220,15 +281,15 @@ sha1_block_data_order: add r5,r8,r5,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r3,r4 @ F_xx_xx mov r9,r9,ror#31 add r5,r5,r6,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! eor r10,r7,r10,ror#2 @ F_xx_xx @ F_xx_xx add r5,r5,r9 @ E+=X[i] - str r9,[r14,#-4]! add r5,r5,r10 @ E+=F_20_39(B,C,D) ldr r9,[r14,#15*4] ldr r10,[r14,#13*4] @@ -236,15 +297,15 @@ sha1_block_data_order: add r4,r8,r4,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r7,r3 @ F_xx_xx mov r9,r9,ror#31 add r4,r4,r5,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! eor r10,r6,r10,ror#2 @ F_xx_xx @ F_xx_xx add r4,r4,r9 @ E+=X[i] - str r9,[r14,#-4]! add r4,r4,r10 @ E+=F_20_39(B,C,D) ldr r9,[r14,#15*4] ldr r10,[r14,#13*4] @@ -252,15 +313,15 @@ sha1_block_data_order: add r3,r8,r3,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r6,r7 @ F_xx_xx mov r9,r9,ror#31 add r3,r3,r4,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! eor r10,r5,r10,ror#2 @ F_xx_xx @ F_xx_xx add r3,r3,r9 @ E+=X[i] - str r9,[r14,#-4]! add r3,r3,r10 @ E+=F_20_39(B,C,D) teq r14,sp @ preserve carry bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] @@ -275,15 +336,15 @@ sha1_block_data_order: add r7,r8,r7,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r5,r6 @ F_xx_xx mov r9,r9,ror#31 add r7,r7,r3,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r4,r10,ror#2 @ F_xx_xx and r11,r5,r6 @ F_xx_xx add r7,r7,r9 @ E+=X[i] - str r9,[r14,#-4]! add r7,r7,r10 @ E+=F_40_59(B,C,D) add r7,r7,r11,ror#2 ldr r9,[r14,#15*4] @@ -292,15 +353,15 @@ sha1_block_data_order: add r6,r8,r6,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r4,r5 @ F_xx_xx mov r9,r9,ror#31 add r6,r6,r7,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r3,r10,ror#2 @ F_xx_xx and r11,r4,r5 @ F_xx_xx add r6,r6,r9 @ E+=X[i] - str r9,[r14,#-4]! add r6,r6,r10 @ E+=F_40_59(B,C,D) add r6,r6,r11,ror#2 ldr r9,[r14,#15*4] @@ -309,15 +370,15 @@ sha1_block_data_order: add r5,r8,r5,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r3,r4 @ F_xx_xx mov r9,r9,ror#31 add r5,r5,r6,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r7,r10,ror#2 @ F_xx_xx and r11,r3,r4 @ F_xx_xx add r5,r5,r9 @ E+=X[i] - str r9,[r14,#-4]! add r5,r5,r10 @ E+=F_40_59(B,C,D) add r5,r5,r11,ror#2 ldr r9,[r14,#15*4] @@ -326,15 +387,15 @@ sha1_block_data_order: add r4,r8,r4,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r7,r3 @ F_xx_xx mov r9,r9,ror#31 add r4,r4,r5,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r6,r10,ror#2 @ F_xx_xx and r11,r7,r3 @ F_xx_xx add r4,r4,r9 @ E+=X[i] - str r9,[r14,#-4]! add r4,r4,r10 @ E+=F_40_59(B,C,D) add r4,r4,r11,ror#2 ldr r9,[r14,#15*4] @@ -343,15 +404,15 @@ sha1_block_data_order: add r3,r8,r3,ror#2 @ E+=K_xx_xx ldr r12,[r14,#2*4] eor r9,r9,r10 - eor r11,r11,r12 + eor r11,r11,r12 @ 1 cycle stall eor r10,r6,r7 @ F_xx_xx mov r9,r9,ror#31 add r3,r3,r4,ror#27 @ E+=ROR(A,27) eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! and r10,r5,r10,ror#2 @ F_xx_xx and r11,r6,r7 @ F_xx_xx add r3,r3,r9 @ E+=X[i] - str r9,[r14,#-4]! add r3,r3,r10 @ E+=F_40_59(B,C,D) add r3,r3,r11,ror#2 teq r14,sp @@ -373,10 +434,14 @@ sha1_block_data_order: teq r1,r2 bne .Lloop @ [+18], total 1307 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 diff --git a/app/openssl/crypto/sha/asm/sha1-ia64.pl b/app/openssl/crypto/sha/asm/sha1-ia64.pl index 51c4f47e..02d35d16 100644 --- a/app/openssl/crypto/sha/asm/sha1-ia64.pl +++ b/app/openssl/crypto/sha/asm/sha1-ia64.pl @@ -15,7 +15,7 @@ # is >50% better than HP C and >2x better than gcc. $code=<<___; -.ident \"sha1-ia64.s, version 1.2\" +.ident \"sha1-ia64.s, version 1.3\" .ident \"IA-64 ISA artwork by Andy Polyakov \" .explicit @@ -26,14 +26,10 @@ if ($^O eq "hpux") { $ADDP="addp4"; for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } } else { $ADDP="add"; } -for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); - $big_endian=0 if (/\-DL_ENDIAN/); } -if (!defined($big_endian)) - { $big_endian=(unpack('L',pack('N',1))==1); } #$human=1; if ($human) { # useful for visual code auditing... - ($A,$B,$C,$D,$E,$T) = ("A","B","C","D","E","T"); + ($A,$B,$C,$D,$E) = ("A","B","C","D","E"); ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4"); ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = ( "K_00_19","K_20_39","K_40_59","K_60_79" ); @@ -41,47 +37,50 @@ if ($human) { # useful for visual code auditing... "X8", "X9","X10","X11","X12","X13","X14","X15" ); } else { - ($A,$B,$C,$D,$E,$T) = ("loc0","loc1","loc2","loc3","loc4","loc5"); - ($h0,$h1,$h2,$h3,$h4) = ("loc6","loc7","loc8","loc9","loc10"); + ($A,$B,$C,$D,$E) = ("loc0","loc1","loc2","loc3","loc4"); + ($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9"); ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = - ( "r14", "r15", "loc11", "loc12" ); + ( "r14", "r15", "loc10", "loc11" ); @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ); } sub BODY_00_15 { local *code=shift; -local ($i,$a,$b,$c,$d,$e,$f)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +my $Xn=@X[$j%16]; $code.=<<___ if ($i==0); -{ .mmi; ld1 $X[$i&0xf]=[inp],2 // MSB +{ .mmi; ld1 $X[$i]=[inp],2 // MSB ld1 tmp2=[tmp3],2 };; { .mmi; ld1 tmp0=[inp],2 ld1 tmp4=[tmp3],2 // LSB - dep $X[$i&0xf]=$X[$i&0xf],tmp2,8,8 };; + dep $X[$i]=$X[$i],tmp2,8,8 };; ___ if ($i<15) { $code.=<<___; -{ .mmi; ld1 $X[($i+1)&0xf]=[inp],2 // +1 +{ .mmi; ld1 $Xn=[inp],2 // forward Xload + nop.m 0x0 dep tmp1=tmp0,tmp4,8,8 };; -{ .mmi; ld1 tmp2=[tmp3],2 // +1 +{ .mmi; ld1 tmp2=[tmp3],2 // forward Xload and tmp4=$c,$b - dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;; -{ .mmi; andcm tmp1=$d,$b - add tmp0=$e,$K_00_19 + dep $X[$i]=$X[$i],tmp1,16,16} //;; +{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19 + andcm tmp1=$d,$b dep.z tmp5=$a,5,27 };; // a<<5 -{ .mmi; or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19 +{ .mmi; add $e=$e,$X[$i] // e+=Xload + or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) extr.u tmp1=$a,27,5 };; // a>>27 -{ .mmi; ld1 tmp0=[inp],2 // +1 - add $f=$f,tmp4 // f+=F_00_19(b,c,d) +{ .mmi; ld1 tmp0=[inp],2 // forward Xload + add $e=$e,tmp4 // e+=F_00_19(b,c,d) shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; ld1 tmp4=[tmp3],2 // +1 +{ .mmi; ld1 tmp4=[tmp3],2 // forward Xload or tmp5=tmp1,tmp5 // ROTATE(a,5) mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $f=$f,tmp5 // f+=ROTATE(a,5) - dep $X[($i+1)&0xf]=$X[($i+1)&0xf],tmp2,8,8 // +1 - mux2 $X[$i&0xf]=$X[$i&0xf],0x44 } //;; +{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5) + dep $Xn=$Xn,tmp2,8,8 // forward Xload + mux2 $X[$i]=$X[$i],0x44 } //;; ___ } @@ -89,24 +88,24 @@ else { $code.=<<___; { .mii; and tmp3=$c,$b dep tmp1=tmp0,tmp4,8,8;; - dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;; -{ .mmi; andcm tmp1=$d,$b - add tmp0=$e,$K_00_19 + dep $X[$i]=$X[$i],tmp1,16,16} //;; +{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19 + andcm tmp1=$d,$b dep.z tmp5=$a,5,27 };; // a<<5 -{ .mmi; or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19 +{ .mmi; add $e=$e,$X[$i] // e+=Xupdate + or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 - xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 +{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate + xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate nop.i 0 };; -{ .mmi; add $f=$f,tmp4 // f+=F_00_19(b,c,d) - xor tmp2=tmp2,tmp3 // +1 +{ .mmi; add $e=$e,tmp4 // e+=F_00_19(b,c,d) + xor $Xn=$Xn,tmp3 // forward Xupdate shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) - shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) - mux2 $X[$i&0xf]=$X[$i&0xf],0x44 };; +{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) + shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) + mux2 $X[$i]=$X[$i],0x44 };; ___ } @@ -114,27 +113,28 @@ ___ sub BODY_16_19 { local *code=shift; -local ($i,$a,$b,$c,$d,$e,$f)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +my $Xn=@X[$j%16]; $code.=<<___; -{ .mmi; mov $X[$i&0xf]=$f // Xupdate - and tmp0=$c,$b +{ .mib; add $e=$e,$K_00_19 // e+=K_00_19 dep.z tmp5=$a,5,27 } // a<<5 -{ .mmi; andcm tmp1=$d,$b - add tmp4=$e,$K_00_19 };; -{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - add $f=$f,tmp4 // f+=e+K_00_19 +{ .mib; andcm tmp1=$d,$b + and tmp0=$c,$b };; +{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate + or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 - xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 +{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate + xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate nop.i 0 };; -{ .mmi; add $f=$f,tmp0 // f+=F_00_19(b,c,d) - xor tmp2=tmp2,tmp3 // +1 +{ .mmi; add $e=$e,tmp0 // f+=F_00_19(b,c,d) + xor $Xn=$Xn,tmp3 // forward Xupdate shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) - shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) +{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) + shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) nop.i 0 };; ___ @@ -142,49 +142,47 @@ ___ sub BODY_20_39 { local *code=shift; -local ($i,$a,$b,$c,$d,$e,$f,$Konst)=@_; +my ($i,$a,$b,$c,$d,$e,$Konst)=@_; $Konst = $K_20_39 if (!defined($Konst)); +my $j=$i+1; +my $Xn=@X[$j%16]; if ($i<79) { $code.=<<___; -{ .mib; mov $X[$i&0xf]=$f // Xupdate +{ .mib; add $e=$e,$Konst // e+=K_XX_XX dep.z tmp5=$a,5,27 } // a<<5 { .mib; xor tmp0=$c,$b - add tmp4=$e,$Konst };; -{ .mmi; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d - add $f=$f,tmp4 // f+=e+K_20_39 + xor $Xn=$Xn,$X[($j+2)%16] };; // forward Xupdate +{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 - xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 - nop.i 0 };; -{ .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d) - xor tmp2=tmp2,tmp3 // +1 +{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d + xor $Xn=$Xn,$X[($j+8)%16] };; // forward Xupdate +{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) + xor $Xn=$Xn,$X[($j+13)%16] // forward Xupdate shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) - shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) +{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) + shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) nop.i 0 };; ___ } else { $code.=<<___; -{ .mib; mov $X[$i&0xf]=$f // Xupdate +{ .mib; add $e=$e,$Konst // e+=K_60_79 dep.z tmp5=$a,5,27 } // a<<5 { .mib; xor tmp0=$c,$b - add tmp4=$e,$Konst };; -{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d - extr.u tmp1=$a,27,5 } // a>>27 -{ .mib; add $f=$f,tmp4 // f+=e+K_20_39 add $h1=$h1,$a };; // wrap up -{ .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d) - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) ;;? -{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) +{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate + extr.u tmp1=$a,27,5 } // a>>27 +{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d add $h3=$h3,$c };; // wrap up -{ .mib; add tmp3=1,inp // used in unaligned codepath - add $f=$f,tmp1 } // f+=ROTATE(a,5) -{ .mib; add $h2=$h2,$b // wrap up +{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) + or tmp1=tmp1,tmp5 // ROTATE(a,5) + shrp $b=tmp6,tmp6,2 };; // b=ROTATE(b,30) ;;? +{ .mmi; add $e=$e,tmp1 // e+=ROTATE(a,5) + add tmp3=1,inp // used in unaligned codepath add $h4=$h4,$d };; // wrap up ___ @@ -193,29 +191,29 @@ ___ sub BODY_40_59 { local *code=shift; -local ($i,$a,$b,$c,$d,$e,$f)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +my $Xn=@X[$j%16]; $code.=<<___; -{ .mmi; mov $X[$i&0xf]=$f // Xupdate - and tmp0=$c,$b +{ .mib; add $e=$e,$K_40_59 // e+=K_40_59 dep.z tmp5=$a,5,27 } // a<<5 -{ .mmi; and tmp1=$d,$b - add tmp4=$e,$K_40_59 };; -{ .mmi; or tmp0=tmp0,tmp1 // (b&c)|(b&d) - add $f=$f,tmp4 // f+=e+K_40_59 +{ .mib; and tmp1=$c,$d + xor tmp0=$c,$d };; +{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate + add tmp5=tmp5,tmp1 // a<<5+(c&d) extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; and tmp4=$c,$d - xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 - xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 - };; -{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) - xor tmp2=tmp2,tmp3 // +1 +{ .mmi; and tmp0=tmp0,$b + xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate + xor tmp3=$X[($j+8)%16],$X[($j+13)%16] };; // forward Xupdate +{ .mmi; add $e=$e,tmp0 // e+=b&(c^d) + add tmp5=tmp5,tmp1 // ROTATE(a,5)+(c&d) shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; or tmp0=tmp0,tmp4 // F_40_59(b,c,d)=(b&c)|(b&d)|(c&d) +{ .mmi; xor $Xn=$Xn,tmp3 mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $f=$f,tmp0 // f+=F_40_59(b,c,d) - shrp $e=tmp2,tmp2,31;; // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) - add $f=$f,tmp1 };; // f+=ROTATE(a,5) +{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)+(c&d) + shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) + nop.i 0x0 };; ___ } @@ -237,7 +235,7 @@ inp=r33; // in1 .align 32 sha1_block_data_order: .prologue -{ .mmi; alloc tmp1=ar.pfs,3,15,0,0 +{ .mmi; alloc tmp1=ar.pfs,3,14,0,0 $ADDP tmp0=4,ctx .save ar.lc,r3 mov r3=ar.lc } @@ -245,8 +243,8 @@ sha1_block_data_order: $ADDP inp=0,inp mov r2=pr };; tmp4=in2; -tmp5=loc13; -tmp6=loc14; +tmp5=loc12; +tmp6=loc13; .body { .mlx; ld4 $h0=[ctx],8 movl $K_00_19=0x5a827999 } @@ -273,7 +271,8 @@ tmp6=loc14; ___ -{ my $i,@V=($A,$B,$C,$D,$E,$T); +{ my $i; + my @V=($A,$B,$C,$D,$E); for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); } for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); } @@ -281,12 +280,12 @@ ___ for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); } for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); } - (($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check + (($V[0] eq $A) and ($V[4] eq $E)) or die; # double-check } $code.=<<___; -{ .mmb; add $h0=$h0,$E - nop.m 0 +{ .mmb; add $h0=$h0,$A + add $h2=$h2,$C br.ctop.dptk.many .Ldtop };; .Ldend: { .mmi; add tmp0=4,ctx diff --git a/app/openssl/crypto/sha/asm/sha1-mips.S b/app/openssl/crypto/sha/asm/sha1-mips.S new file mode 100644 index 00000000..865da255 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-mips.S @@ -0,0 +1,1664 @@ +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +.text + +.set noat +.set noreorder +.align 5 +.globl sha1_block_data_order +.ent sha1_block_data_order +sha1_block_data_order: + .frame $29,16*4,$31 + .mask 3237937152,-4 + .set noreorder + sub $29,16*4 + sw $31,(16-1)*4($29) + sw $30,(16-2)*4($29) + sw $23,(16-3)*4($29) + sw $22,(16-4)*4($29) + sw $21,(16-5)*4($29) + sw $20,(16-6)*4($29) + sw $19,(16-7)*4($29) + sw $18,(16-8)*4($29) + sw $17,(16-9)*4($29) + sw $16,(16-10)*4($29) + sll $6,6 + add $6,$5 + sw $6,0($29) + lw $1,0($4) + lw $2,4($4) + lw $3,8($4) + lw $7,12($4) + b .Loop + lw $24,16($4) +.align 4 +.Loop: + .set reorder + lwl $8,3($5) + lui $31,0x5a82 + lwr $8,0($5) + ori $31,0x7999 # K_00_19 + srl $25,$8,24 # byte swap(0) + srl $6,$8,8 + andi $30,$8,0xFF00 + sll $8,$8,24 + andi $6,0xFF00 + sll $30,$30,8 + or $8,$25 + or $6,$30 + or $8,$6 + lwl $9,1*4+3($5) + sll $25,$1,5 # 0 + addu $24,$31 + lwr $9,1*4+0($5) + srl $6,$1,27 + addu $24,$25 + xor $25,$3,$7 + addu $24,$6 + sll $30,$2,30 + and $25,$2 + srl $2,$2,2 + xor $25,$7 + addu $24,$8 + or $2,$30 + addu $24,$25 + srl $25,$9,24 # byte swap(1) + srl $6,$9,8 + andi $30,$9,0xFF00 + sll $9,$9,24 + andi $6,0xFF00 + sll $30,$30,8 + or $9,$25 + or $6,$30 + or $9,$6 + lwl $10,2*4+3($5) + sll $25,$24,5 # 1 + addu $7,$31 + lwr $10,2*4+0($5) + srl $6,$24,27 + addu $7,$25 + xor $25,$2,$3 + addu $7,$6 + sll $30,$1,30 + and $25,$1 + srl $1,$1,2 + xor $25,$3 + addu $7,$9 + or $1,$30 + addu $7,$25 + srl $25,$10,24 # byte swap(2) + srl $6,$10,8 + andi $30,$10,0xFF00 + sll $10,$10,24 + andi $6,0xFF00 + sll $30,$30,8 + or $10,$25 + or $6,$30 + or $10,$6 + lwl $11,3*4+3($5) + sll $25,$7,5 # 2 + addu $3,$31 + lwr $11,3*4+0($5) + srl $6,$7,27 + addu $3,$25 + xor $25,$1,$2 + addu $3,$6 + sll $30,$24,30 + and $25,$24 + srl $24,$24,2 + xor $25,$2 + addu $3,$10 + or $24,$30 + addu $3,$25 + srl $25,$11,24 # byte swap(3) + srl $6,$11,8 + andi $30,$11,0xFF00 + sll $11,$11,24 + andi $6,0xFF00 + sll $30,$30,8 + or $11,$25 + or $6,$30 + or $11,$6 + lwl $12,4*4+3($5) + sll $25,$3,5 # 3 + addu $2,$31 + lwr $12,4*4+0($5) + srl $6,$3,27 + addu $2,$25 + xor $25,$24,$1 + addu $2,$6 + sll $30,$7,30 + and $25,$7 + srl $7,$7,2 + xor $25,$1 + addu $2,$11 + or $7,$30 + addu $2,$25 + srl $25,$12,24 # byte swap(4) + srl $6,$12,8 + andi $30,$12,0xFF00 + sll $12,$12,24 + andi $6,0xFF00 + sll $30,$30,8 + or $12,$25 + or $6,$30 + or $12,$6 + lwl $13,5*4+3($5) + sll $25,$2,5 # 4 + addu $1,$31 + lwr $13,5*4+0($5) + srl $6,$2,27 + addu $1,$25 + xor $25,$7,$24 + addu $1,$6 + sll $30,$3,30 + and $25,$3 + srl $3,$3,2 + xor $25,$24 + addu $1,$12 + or $3,$30 + addu $1,$25 + srl $25,$13,24 # byte swap(5) + srl $6,$13,8 + andi $30,$13,0xFF00 + sll $13,$13,24 + andi $6,0xFF00 + sll $30,$30,8 + or $13,$25 + or $6,$30 + or $13,$6 + lwl $14,6*4+3($5) + sll $25,$1,5 # 5 + addu $24,$31 + lwr $14,6*4+0($5) + srl $6,$1,27 + addu $24,$25 + xor $25,$3,$7 + addu $24,$6 + sll $30,$2,30 + and $25,$2 + srl $2,$2,2 + xor $25,$7 + addu $24,$13 + or $2,$30 + addu $24,$25 + srl $25,$14,24 # byte swap(6) + srl $6,$14,8 + andi $30,$14,0xFF00 + sll $14,$14,24 + andi $6,0xFF00 + sll $30,$30,8 + or $14,$25 + or $6,$30 + or $14,$6 + lwl $15,7*4+3($5) + sll $25,$24,5 # 6 + addu $7,$31 + lwr $15,7*4+0($5) + srl $6,$24,27 + addu $7,$25 + xor $25,$2,$3 + addu $7,$6 + sll $30,$1,30 + and $25,$1 + srl $1,$1,2 + xor $25,$3 + addu $7,$14 + or $1,$30 + addu $7,$25 + srl $25,$15,24 # byte swap(7) + srl $6,$15,8 + andi $30,$15,0xFF00 + sll $15,$15,24 + andi $6,0xFF00 + sll $30,$30,8 + or $15,$25 + or $6,$30 + or $15,$6 + lwl $16,8*4+3($5) + sll $25,$7,5 # 7 + addu $3,$31 + lwr $16,8*4+0($5) + srl $6,$7,27 + addu $3,$25 + xor $25,$1,$2 + addu $3,$6 + sll $30,$24,30 + and $25,$24 + srl $24,$24,2 + xor $25,$2 + addu $3,$15 + or $24,$30 + addu $3,$25 + srl $25,$16,24 # byte swap(8) + srl $6,$16,8 + andi $30,$16,0xFF00 + sll $16,$16,24 + andi $6,0xFF00 + sll $30,$30,8 + or $16,$25 + or $6,$30 + or $16,$6 + lwl $17,9*4+3($5) + sll $25,$3,5 # 8 + addu $2,$31 + lwr $17,9*4+0($5) + srl $6,$3,27 + addu $2,$25 + xor $25,$24,$1 + addu $2,$6 + sll $30,$7,30 + and $25,$7 + srl $7,$7,2 + xor $25,$1 + addu $2,$16 + or $7,$30 + addu $2,$25 + srl $25,$17,24 # byte swap(9) + srl $6,$17,8 + andi $30,$17,0xFF00 + sll $17,$17,24 + andi $6,0xFF00 + sll $30,$30,8 + or $17,$25 + or $6,$30 + or $17,$6 + lwl $18,10*4+3($5) + sll $25,$2,5 # 9 + addu $1,$31 + lwr $18,10*4+0($5) + srl $6,$2,27 + addu $1,$25 + xor $25,$7,$24 + addu $1,$6 + sll $30,$3,30 + and $25,$3 + srl $3,$3,2 + xor $25,$24 + addu $1,$17 + or $3,$30 + addu $1,$25 + srl $25,$18,24 # byte swap(10) + srl $6,$18,8 + andi $30,$18,0xFF00 + sll $18,$18,24 + andi $6,0xFF00 + sll $30,$30,8 + or $18,$25 + or $6,$30 + or $18,$6 + lwl $19,11*4+3($5) + sll $25,$1,5 # 10 + addu $24,$31 + lwr $19,11*4+0($5) + srl $6,$1,27 + addu $24,$25 + xor $25,$3,$7 + addu $24,$6 + sll $30,$2,30 + and $25,$2 + srl $2,$2,2 + xor $25,$7 + addu $24,$18 + or $2,$30 + addu $24,$25 + srl $25,$19,24 # byte swap(11) + srl $6,$19,8 + andi $30,$19,0xFF00 + sll $19,$19,24 + andi $6,0xFF00 + sll $30,$30,8 + or $19,$25 + or $6,$30 + or $19,$6 + lwl $20,12*4+3($5) + sll $25,$24,5 # 11 + addu $7,$31 + lwr $20,12*4+0($5) + srl $6,$24,27 + addu $7,$25 + xor $25,$2,$3 + addu $7,$6 + sll $30,$1,30 + and $25,$1 + srl $1,$1,2 + xor $25,$3 + addu $7,$19 + or $1,$30 + addu $7,$25 + srl $25,$20,24 # byte swap(12) + srl $6,$20,8 + andi $30,$20,0xFF00 + sll $20,$20,24 + andi $6,0xFF00 + sll $30,$30,8 + or $20,$25 + or $6,$30 + or $20,$6 + lwl $21,13*4+3($5) + sll $25,$7,5 # 12 + addu $3,$31 + lwr $21,13*4+0($5) + srl $6,$7,27 + addu $3,$25 + xor $25,$1,$2 + addu $3,$6 + sll $30,$24,30 + and $25,$24 + srl $24,$24,2 + xor $25,$2 + addu $3,$20 + or $24,$30 + addu $3,$25 + srl $25,$21,24 # byte swap(13) + srl $6,$21,8 + andi $30,$21,0xFF00 + sll $21,$21,24 + andi $6,0xFF00 + sll $30,$30,8 + or $21,$25 + or $6,$30 + or $21,$6 + lwl $22,14*4+3($5) + sll $25,$3,5 # 13 + addu $2,$31 + lwr $22,14*4+0($5) + srl $6,$3,27 + addu $2,$25 + xor $25,$24,$1 + addu $2,$6 + sll $30,$7,30 + and $25,$7 + srl $7,$7,2 + xor $25,$1 + addu $2,$21 + or $7,$30 + addu $2,$25 + srl $25,$22,24 # byte swap(14) + srl $6,$22,8 + andi $30,$22,0xFF00 + sll $22,$22,24 + andi $6,0xFF00 + sll $30,$30,8 + or $22,$25 + or $6,$30 + or $22,$6 + lwl $23,15*4+3($5) + sll $25,$2,5 # 14 + addu $1,$31 + lwr $23,15*4+0($5) + srl $6,$2,27 + addu $1,$25 + xor $25,$7,$24 + addu $1,$6 + sll $30,$3,30 + and $25,$3 + srl $3,$3,2 + xor $25,$24 + addu $1,$22 + or $3,$30 + addu $1,$25 + srl $25,$23,24 # byte swap(15) + srl $6,$23,8 + andi $30,$23,0xFF00 + sll $23,$23,24 + andi $6,0xFF00 + sll $30,$30,8 + or $23,$25 + or $23,$6 + or $23,$30 + xor $8,$10 + sll $25,$1,5 # 15 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $8,$16 + xor $25,$3,$7 + addu $24,$6 + xor $8,$21 + sll $30,$2,30 + and $25,$2 + srl $6,$8,31 + addu $8,$8 + srl $2,$2,2 + xor $25,$7 + or $8,$6 + addu $24,$23 + or $2,$30 + addu $24,$25 + xor $9,$11 + sll $25,$24,5 # 16 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $9,$17 + xor $25,$2,$3 + addu $7,$6 + xor $9,$22 + sll $30,$1,30 + and $25,$1 + srl $6,$9,31 + addu $9,$9 + srl $1,$1,2 + xor $25,$3 + or $9,$6 + addu $7,$8 + or $1,$30 + addu $7,$25 + xor $10,$12 + sll $25,$7,5 # 17 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $10,$18 + xor $25,$1,$2 + addu $3,$6 + xor $10,$23 + sll $30,$24,30 + and $25,$24 + srl $6,$10,31 + addu $10,$10 + srl $24,$24,2 + xor $25,$2 + or $10,$6 + addu $3,$9 + or $24,$30 + addu $3,$25 + xor $11,$13 + sll $25,$3,5 # 18 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $11,$19 + xor $25,$24,$1 + addu $2,$6 + xor $11,$8 + sll $30,$7,30 + and $25,$7 + srl $6,$11,31 + addu $11,$11 + srl $7,$7,2 + xor $25,$1 + or $11,$6 + addu $2,$10 + or $7,$30 + addu $2,$25 + xor $12,$14 + sll $25,$2,5 # 19 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $12,$20 + xor $25,$7,$24 + addu $1,$6 + xor $12,$9 + sll $30,$3,30 + and $25,$3 + srl $6,$12,31 + addu $12,$12 + srl $3,$3,2 + xor $25,$24 + or $12,$6 + addu $1,$11 + or $3,$30 + addu $1,$25 + lui $31,0x6ed9 + ori $31,0xeba1 # K_20_39 + xor $13,$15 + sll $25,$1,5 # 20 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $13,$21 + xor $25,$3,$7 + addu $24,$6 + xor $13,$10 + sll $30,$2,30 + xor $25,$2 + srl $6,$13,31 + addu $13,$13 + srl $2,$2,2 + addu $24,$12 + or $13,$6 + or $2,$30 + addu $24,$25 + xor $14,$16 + sll $25,$24,5 # 21 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $14,$22 + xor $25,$2,$3 + addu $7,$6 + xor $14,$11 + sll $30,$1,30 + xor $25,$1 + srl $6,$14,31 + addu $14,$14 + srl $1,$1,2 + addu $7,$13 + or $14,$6 + or $1,$30 + addu $7,$25 + xor $15,$17 + sll $25,$7,5 # 22 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $15,$23 + xor $25,$1,$2 + addu $3,$6 + xor $15,$12 + sll $30,$24,30 + xor $25,$24 + srl $6,$15,31 + addu $15,$15 + srl $24,$24,2 + addu $3,$14 + or $15,$6 + or $24,$30 + addu $3,$25 + xor $16,$18 + sll $25,$3,5 # 23 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $16,$8 + xor $25,$24,$1 + addu $2,$6 + xor $16,$13 + sll $30,$7,30 + xor $25,$7 + srl $6,$16,31 + addu $16,$16 + srl $7,$7,2 + addu $2,$15 + or $16,$6 + or $7,$30 + addu $2,$25 + xor $17,$19 + sll $25,$2,5 # 24 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $17,$9 + xor $25,$7,$24 + addu $1,$6 + xor $17,$14 + sll $30,$3,30 + xor $25,$3 + srl $6,$17,31 + addu $17,$17 + srl $3,$3,2 + addu $1,$16 + or $17,$6 + or $3,$30 + addu $1,$25 + xor $18,$20 + sll $25,$1,5 # 25 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $18,$10 + xor $25,$3,$7 + addu $24,$6 + xor $18,$15 + sll $30,$2,30 + xor $25,$2 + srl $6,$18,31 + addu $18,$18 + srl $2,$2,2 + addu $24,$17 + or $18,$6 + or $2,$30 + addu $24,$25 + xor $19,$21 + sll $25,$24,5 # 26 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $19,$11 + xor $25,$2,$3 + addu $7,$6 + xor $19,$16 + sll $30,$1,30 + xor $25,$1 + srl $6,$19,31 + addu $19,$19 + srl $1,$1,2 + addu $7,$18 + or $19,$6 + or $1,$30 + addu $7,$25 + xor $20,$22 + sll $25,$7,5 # 27 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $20,$12 + xor $25,$1,$2 + addu $3,$6 + xor $20,$17 + sll $30,$24,30 + xor $25,$24 + srl $6,$20,31 + addu $20,$20 + srl $24,$24,2 + addu $3,$19 + or $20,$6 + or $24,$30 + addu $3,$25 + xor $21,$23 + sll $25,$3,5 # 28 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $21,$13 + xor $25,$24,$1 + addu $2,$6 + xor $21,$18 + sll $30,$7,30 + xor $25,$7 + srl $6,$21,31 + addu $21,$21 + srl $7,$7,2 + addu $2,$20 + or $21,$6 + or $7,$30 + addu $2,$25 + xor $22,$8 + sll $25,$2,5 # 29 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $22,$14 + xor $25,$7,$24 + addu $1,$6 + xor $22,$19 + sll $30,$3,30 + xor $25,$3 + srl $6,$22,31 + addu $22,$22 + srl $3,$3,2 + addu $1,$21 + or $22,$6 + or $3,$30 + addu $1,$25 + xor $23,$9 + sll $25,$1,5 # 30 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $23,$15 + xor $25,$3,$7 + addu $24,$6 + xor $23,$20 + sll $30,$2,30 + xor $25,$2 + srl $6,$23,31 + addu $23,$23 + srl $2,$2,2 + addu $24,$22 + or $23,$6 + or $2,$30 + addu $24,$25 + xor $8,$10 + sll $25,$24,5 # 31 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $8,$16 + xor $25,$2,$3 + addu $7,$6 + xor $8,$21 + sll $30,$1,30 + xor $25,$1 + srl $6,$8,31 + addu $8,$8 + srl $1,$1,2 + addu $7,$23 + or $8,$6 + or $1,$30 + addu $7,$25 + xor $9,$11 + sll $25,$7,5 # 32 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $9,$17 + xor $25,$1,$2 + addu $3,$6 + xor $9,$22 + sll $30,$24,30 + xor $25,$24 + srl $6,$9,31 + addu $9,$9 + srl $24,$24,2 + addu $3,$8 + or $9,$6 + or $24,$30 + addu $3,$25 + xor $10,$12 + sll $25,$3,5 # 33 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $10,$18 + xor $25,$24,$1 + addu $2,$6 + xor $10,$23 + sll $30,$7,30 + xor $25,$7 + srl $6,$10,31 + addu $10,$10 + srl $7,$7,2 + addu $2,$9 + or $10,$6 + or $7,$30 + addu $2,$25 + xor $11,$13 + sll $25,$2,5 # 34 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $11,$19 + xor $25,$7,$24 + addu $1,$6 + xor $11,$8 + sll $30,$3,30 + xor $25,$3 + srl $6,$11,31 + addu $11,$11 + srl $3,$3,2 + addu $1,$10 + or $11,$6 + or $3,$30 + addu $1,$25 + xor $12,$14 + sll $25,$1,5 # 35 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $12,$20 + xor $25,$3,$7 + addu $24,$6 + xor $12,$9 + sll $30,$2,30 + xor $25,$2 + srl $6,$12,31 + addu $12,$12 + srl $2,$2,2 + addu $24,$11 + or $12,$6 + or $2,$30 + addu $24,$25 + xor $13,$15 + sll $25,$24,5 # 36 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $13,$21 + xor $25,$2,$3 + addu $7,$6 + xor $13,$10 + sll $30,$1,30 + xor $25,$1 + srl $6,$13,31 + addu $13,$13 + srl $1,$1,2 + addu $7,$12 + or $13,$6 + or $1,$30 + addu $7,$25 + xor $14,$16 + sll $25,$7,5 # 37 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $14,$22 + xor $25,$1,$2 + addu $3,$6 + xor $14,$11 + sll $30,$24,30 + xor $25,$24 + srl $6,$14,31 + addu $14,$14 + srl $24,$24,2 + addu $3,$13 + or $14,$6 + or $24,$30 + addu $3,$25 + xor $15,$17 + sll $25,$3,5 # 38 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $15,$23 + xor $25,$24,$1 + addu $2,$6 + xor $15,$12 + sll $30,$7,30 + xor $25,$7 + srl $6,$15,31 + addu $15,$15 + srl $7,$7,2 + addu $2,$14 + or $15,$6 + or $7,$30 + addu $2,$25 + xor $16,$18 + sll $25,$2,5 # 39 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $16,$8 + xor $25,$7,$24 + addu $1,$6 + xor $16,$13 + sll $30,$3,30 + xor $25,$3 + srl $6,$16,31 + addu $16,$16 + srl $3,$3,2 + addu $1,$15 + or $16,$6 + or $3,$30 + addu $1,$25 + lui $31,0x8f1b + ori $31,0xbcdc # K_40_59 + xor $17,$19 + sll $25,$1,5 # 40 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $17,$9 + and $25,$3,$7 + addu $24,$6 + xor $17,$14 + sll $30,$2,30 + addu $24,$25 + srl $6,$17,31 + xor $25,$3,$7 + addu $17,$17 + and $25,$2 + srl $2,$2,2 + or $17,$6 + addu $24,$16 + or $2,$30 + addu $24,$25 + xor $18,$20 + sll $25,$24,5 # 41 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $18,$10 + and $25,$2,$3 + addu $7,$6 + xor $18,$15 + sll $30,$1,30 + addu $7,$25 + srl $6,$18,31 + xor $25,$2,$3 + addu $18,$18 + and $25,$1 + srl $1,$1,2 + or $18,$6 + addu $7,$17 + or $1,$30 + addu $7,$25 + xor $19,$21 + sll $25,$7,5 # 42 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $19,$11 + and $25,$1,$2 + addu $3,$6 + xor $19,$16 + sll $30,$24,30 + addu $3,$25 + srl $6,$19,31 + xor $25,$1,$2 + addu $19,$19 + and $25,$24 + srl $24,$24,2 + or $19,$6 + addu $3,$18 + or $24,$30 + addu $3,$25 + xor $20,$22 + sll $25,$3,5 # 43 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $20,$12 + and $25,$24,$1 + addu $2,$6 + xor $20,$17 + sll $30,$7,30 + addu $2,$25 + srl $6,$20,31 + xor $25,$24,$1 + addu $20,$20 + and $25,$7 + srl $7,$7,2 + or $20,$6 + addu $2,$19 + or $7,$30 + addu $2,$25 + xor $21,$23 + sll $25,$2,5 # 44 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $21,$13 + and $25,$7,$24 + addu $1,$6 + xor $21,$18 + sll $30,$3,30 + addu $1,$25 + srl $6,$21,31 + xor $25,$7,$24 + addu $21,$21 + and $25,$3 + srl $3,$3,2 + or $21,$6 + addu $1,$20 + or $3,$30 + addu $1,$25 + xor $22,$8 + sll $25,$1,5 # 45 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $22,$14 + and $25,$3,$7 + addu $24,$6 + xor $22,$19 + sll $30,$2,30 + addu $24,$25 + srl $6,$22,31 + xor $25,$3,$7 + addu $22,$22 + and $25,$2 + srl $2,$2,2 + or $22,$6 + addu $24,$21 + or $2,$30 + addu $24,$25 + xor $23,$9 + sll $25,$24,5 # 46 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $23,$15 + and $25,$2,$3 + addu $7,$6 + xor $23,$20 + sll $30,$1,30 + addu $7,$25 + srl $6,$23,31 + xor $25,$2,$3 + addu $23,$23 + and $25,$1 + srl $1,$1,2 + or $23,$6 + addu $7,$22 + or $1,$30 + addu $7,$25 + xor $8,$10 + sll $25,$7,5 # 47 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $8,$16 + and $25,$1,$2 + addu $3,$6 + xor $8,$21 + sll $30,$24,30 + addu $3,$25 + srl $6,$8,31 + xor $25,$1,$2 + addu $8,$8 + and $25,$24 + srl $24,$24,2 + or $8,$6 + addu $3,$23 + or $24,$30 + addu $3,$25 + xor $9,$11 + sll $25,$3,5 # 48 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $9,$17 + and $25,$24,$1 + addu $2,$6 + xor $9,$22 + sll $30,$7,30 + addu $2,$25 + srl $6,$9,31 + xor $25,$24,$1 + addu $9,$9 + and $25,$7 + srl $7,$7,2 + or $9,$6 + addu $2,$8 + or $7,$30 + addu $2,$25 + xor $10,$12 + sll $25,$2,5 # 49 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $10,$18 + and $25,$7,$24 + addu $1,$6 + xor $10,$23 + sll $30,$3,30 + addu $1,$25 + srl $6,$10,31 + xor $25,$7,$24 + addu $10,$10 + and $25,$3 + srl $3,$3,2 + or $10,$6 + addu $1,$9 + or $3,$30 + addu $1,$25 + xor $11,$13 + sll $25,$1,5 # 50 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $11,$19 + and $25,$3,$7 + addu $24,$6 + xor $11,$8 + sll $30,$2,30 + addu $24,$25 + srl $6,$11,31 + xor $25,$3,$7 + addu $11,$11 + and $25,$2 + srl $2,$2,2 + or $11,$6 + addu $24,$10 + or $2,$30 + addu $24,$25 + xor $12,$14 + sll $25,$24,5 # 51 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $12,$20 + and $25,$2,$3 + addu $7,$6 + xor $12,$9 + sll $30,$1,30 + addu $7,$25 + srl $6,$12,31 + xor $25,$2,$3 + addu $12,$12 + and $25,$1 + srl $1,$1,2 + or $12,$6 + addu $7,$11 + or $1,$30 + addu $7,$25 + xor $13,$15 + sll $25,$7,5 # 52 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $13,$21 + and $25,$1,$2 + addu $3,$6 + xor $13,$10 + sll $30,$24,30 + addu $3,$25 + srl $6,$13,31 + xor $25,$1,$2 + addu $13,$13 + and $25,$24 + srl $24,$24,2 + or $13,$6 + addu $3,$12 + or $24,$30 + addu $3,$25 + xor $14,$16 + sll $25,$3,5 # 53 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $14,$22 + and $25,$24,$1 + addu $2,$6 + xor $14,$11 + sll $30,$7,30 + addu $2,$25 + srl $6,$14,31 + xor $25,$24,$1 + addu $14,$14 + and $25,$7 + srl $7,$7,2 + or $14,$6 + addu $2,$13 + or $7,$30 + addu $2,$25 + xor $15,$17 + sll $25,$2,5 # 54 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $15,$23 + and $25,$7,$24 + addu $1,$6 + xor $15,$12 + sll $30,$3,30 + addu $1,$25 + srl $6,$15,31 + xor $25,$7,$24 + addu $15,$15 + and $25,$3 + srl $3,$3,2 + or $15,$6 + addu $1,$14 + or $3,$30 + addu $1,$25 + xor $16,$18 + sll $25,$1,5 # 55 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $16,$8 + and $25,$3,$7 + addu $24,$6 + xor $16,$13 + sll $30,$2,30 + addu $24,$25 + srl $6,$16,31 + xor $25,$3,$7 + addu $16,$16 + and $25,$2 + srl $2,$2,2 + or $16,$6 + addu $24,$15 + or $2,$30 + addu $24,$25 + xor $17,$19 + sll $25,$24,5 # 56 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $17,$9 + and $25,$2,$3 + addu $7,$6 + xor $17,$14 + sll $30,$1,30 + addu $7,$25 + srl $6,$17,31 + xor $25,$2,$3 + addu $17,$17 + and $25,$1 + srl $1,$1,2 + or $17,$6 + addu $7,$16 + or $1,$30 + addu $7,$25 + xor $18,$20 + sll $25,$7,5 # 57 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $18,$10 + and $25,$1,$2 + addu $3,$6 + xor $18,$15 + sll $30,$24,30 + addu $3,$25 + srl $6,$18,31 + xor $25,$1,$2 + addu $18,$18 + and $25,$24 + srl $24,$24,2 + or $18,$6 + addu $3,$17 + or $24,$30 + addu $3,$25 + xor $19,$21 + sll $25,$3,5 # 58 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $19,$11 + and $25,$24,$1 + addu $2,$6 + xor $19,$16 + sll $30,$7,30 + addu $2,$25 + srl $6,$19,31 + xor $25,$24,$1 + addu $19,$19 + and $25,$7 + srl $7,$7,2 + or $19,$6 + addu $2,$18 + or $7,$30 + addu $2,$25 + xor $20,$22 + sll $25,$2,5 # 59 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $20,$12 + and $25,$7,$24 + addu $1,$6 + xor $20,$17 + sll $30,$3,30 + addu $1,$25 + srl $6,$20,31 + xor $25,$7,$24 + addu $20,$20 + and $25,$3 + srl $3,$3,2 + or $20,$6 + addu $1,$19 + or $3,$30 + addu $1,$25 + lui $31,0xca62 + ori $31,0xc1d6 # K_60_79 + xor $21,$23 + sll $25,$1,5 # 60 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $21,$13 + xor $25,$3,$7 + addu $24,$6 + xor $21,$18 + sll $30,$2,30 + xor $25,$2 + srl $6,$21,31 + addu $21,$21 + srl $2,$2,2 + addu $24,$20 + or $21,$6 + or $2,$30 + addu $24,$25 + xor $22,$8 + sll $25,$24,5 # 61 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $22,$14 + xor $25,$2,$3 + addu $7,$6 + xor $22,$19 + sll $30,$1,30 + xor $25,$1 + srl $6,$22,31 + addu $22,$22 + srl $1,$1,2 + addu $7,$21 + or $22,$6 + or $1,$30 + addu $7,$25 + xor $23,$9 + sll $25,$7,5 # 62 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $23,$15 + xor $25,$1,$2 + addu $3,$6 + xor $23,$20 + sll $30,$24,30 + xor $25,$24 + srl $6,$23,31 + addu $23,$23 + srl $24,$24,2 + addu $3,$22 + or $23,$6 + or $24,$30 + addu $3,$25 + xor $8,$10 + sll $25,$3,5 # 63 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $8,$16 + xor $25,$24,$1 + addu $2,$6 + xor $8,$21 + sll $30,$7,30 + xor $25,$7 + srl $6,$8,31 + addu $8,$8 + srl $7,$7,2 + addu $2,$23 + or $8,$6 + or $7,$30 + addu $2,$25 + xor $9,$11 + sll $25,$2,5 # 64 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $9,$17 + xor $25,$7,$24 + addu $1,$6 + xor $9,$22 + sll $30,$3,30 + xor $25,$3 + srl $6,$9,31 + addu $9,$9 + srl $3,$3,2 + addu $1,$8 + or $9,$6 + or $3,$30 + addu $1,$25 + xor $10,$12 + sll $25,$1,5 # 65 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $10,$18 + xor $25,$3,$7 + addu $24,$6 + xor $10,$23 + sll $30,$2,30 + xor $25,$2 + srl $6,$10,31 + addu $10,$10 + srl $2,$2,2 + addu $24,$9 + or $10,$6 + or $2,$30 + addu $24,$25 + xor $11,$13 + sll $25,$24,5 # 66 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $11,$19 + xor $25,$2,$3 + addu $7,$6 + xor $11,$8 + sll $30,$1,30 + xor $25,$1 + srl $6,$11,31 + addu $11,$11 + srl $1,$1,2 + addu $7,$10 + or $11,$6 + or $1,$30 + addu $7,$25 + xor $12,$14 + sll $25,$7,5 # 67 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $12,$20 + xor $25,$1,$2 + addu $3,$6 + xor $12,$9 + sll $30,$24,30 + xor $25,$24 + srl $6,$12,31 + addu $12,$12 + srl $24,$24,2 + addu $3,$11 + or $12,$6 + or $24,$30 + addu $3,$25 + xor $13,$15 + sll $25,$3,5 # 68 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $13,$21 + xor $25,$24,$1 + addu $2,$6 + xor $13,$10 + sll $30,$7,30 + xor $25,$7 + srl $6,$13,31 + addu $13,$13 + srl $7,$7,2 + addu $2,$12 + or $13,$6 + or $7,$30 + addu $2,$25 + xor $14,$16 + sll $25,$2,5 # 69 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $14,$22 + xor $25,$7,$24 + addu $1,$6 + xor $14,$11 + sll $30,$3,30 + xor $25,$3 + srl $6,$14,31 + addu $14,$14 + srl $3,$3,2 + addu $1,$13 + or $14,$6 + or $3,$30 + addu $1,$25 + xor $15,$17 + sll $25,$1,5 # 70 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $15,$23 + xor $25,$3,$7 + addu $24,$6 + xor $15,$12 + sll $30,$2,30 + xor $25,$2 + srl $6,$15,31 + addu $15,$15 + srl $2,$2,2 + addu $24,$14 + or $15,$6 + or $2,$30 + addu $24,$25 + xor $16,$18 + sll $25,$24,5 # 71 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $16,$8 + xor $25,$2,$3 + addu $7,$6 + xor $16,$13 + sll $30,$1,30 + xor $25,$1 + srl $6,$16,31 + addu $16,$16 + srl $1,$1,2 + addu $7,$15 + or $16,$6 + or $1,$30 + addu $7,$25 + xor $17,$19 + sll $25,$7,5 # 72 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $17,$9 + xor $25,$1,$2 + addu $3,$6 + xor $17,$14 + sll $30,$24,30 + xor $25,$24 + srl $6,$17,31 + addu $17,$17 + srl $24,$24,2 + addu $3,$16 + or $17,$6 + or $24,$30 + addu $3,$25 + xor $18,$20 + sll $25,$3,5 # 73 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $18,$10 + xor $25,$24,$1 + addu $2,$6 + xor $18,$15 + sll $30,$7,30 + xor $25,$7 + srl $6,$18,31 + addu $18,$18 + srl $7,$7,2 + addu $2,$17 + or $18,$6 + or $7,$30 + addu $2,$25 + xor $19,$21 + sll $25,$2,5 # 74 + addu $1,$31 + srl $6,$2,27 + addu $1,$25 + xor $19,$11 + xor $25,$7,$24 + addu $1,$6 + xor $19,$16 + sll $30,$3,30 + xor $25,$3 + srl $6,$19,31 + addu $19,$19 + srl $3,$3,2 + addu $1,$18 + or $19,$6 + or $3,$30 + addu $1,$25 + xor $20,$22 + sll $25,$1,5 # 75 + addu $24,$31 + srl $6,$1,27 + addu $24,$25 + xor $20,$12 + xor $25,$3,$7 + addu $24,$6 + xor $20,$17 + sll $30,$2,30 + xor $25,$2 + srl $6,$20,31 + addu $20,$20 + srl $2,$2,2 + addu $24,$19 + or $20,$6 + or $2,$30 + addu $24,$25 + xor $21,$23 + sll $25,$24,5 # 76 + addu $7,$31 + srl $6,$24,27 + addu $7,$25 + xor $21,$13 + xor $25,$2,$3 + addu $7,$6 + xor $21,$18 + sll $30,$1,30 + xor $25,$1 + srl $6,$21,31 + addu $21,$21 + srl $1,$1,2 + addu $7,$20 + or $21,$6 + or $1,$30 + addu $7,$25 + xor $22,$8 + sll $25,$7,5 # 77 + addu $3,$31 + srl $6,$7,27 + addu $3,$25 + xor $22,$14 + xor $25,$1,$2 + addu $3,$6 + xor $22,$19 + sll $30,$24,30 + xor $25,$24 + srl $6,$22,31 + addu $22,$22 + srl $24,$24,2 + addu $3,$21 + or $22,$6 + or $24,$30 + addu $3,$25 + xor $23,$9 + sll $25,$3,5 # 78 + addu $2,$31 + srl $6,$3,27 + addu $2,$25 + xor $23,$15 + xor $25,$24,$1 + addu $2,$6 + xor $23,$20 + sll $30,$7,30 + xor $25,$7 + srl $6,$23,31 + addu $23,$23 + srl $7,$7,2 + addu $2,$22 + or $23,$6 + or $7,$30 + addu $2,$25 + lw $8,0($4) + sll $25,$2,5 # 79 + addu $1,$31 + lw $9,4($4) + srl $6,$2,27 + addu $1,$25 + lw $10,8($4) + xor $25,$7,$24 + addu $1,$6 + lw $11,12($4) + sll $30,$3,30 + xor $25,$3 + lw $12,16($4) + srl $3,$3,2 + addu $1,$23 + or $3,$30 + addu $1,$25 + add $5,64 + lw $6,0($29) + + addu $1,$8 + addu $2,$9 + sw $1,0($4) + addu $3,$10 + addu $7,$11 + sw $2,4($4) + addu $24,$12 + sw $3,8($4) + sw $7,12($4) + sw $24,16($4) + .set noreorder + bne $5,$6,.Loop + nop + + .set noreorder + lw $31,(16-1)*4($29) + lw $30,(16-2)*4($29) + lw $23,(16-3)*4($29) + lw $22,(16-4)*4($29) + lw $21,(16-5)*4($29) + lw $20,(16-6)*4($29) + lw $19,(16-7)*4($29) + lw $18,(16-8)*4($29) + lw $17,(16-9)*4($29) + lw $16,(16-10)*4($29) + jr $31 + add $29,16*4 +.end sha1_block_data_order +.rdata +.asciiz "SHA1 for MIPS, CRYPTOGAMS by " diff --git a/app/openssl/crypto/sha/asm/sha1-mips.pl b/app/openssl/crypto/sha/asm/sha1-mips.pl new file mode 100644 index 00000000..f1a702f3 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-mips.pl @@ -0,0 +1,354 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA1 block procedure for MIPS. + +# Performance improvement is 30% on unaligned input. The "secret" is +# to deploy lwl/lwr pair to load unaligned input. One could have +# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- +# compatible subroutine. There is room for minor optimization on +# little-endian platforms... + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $PTR_SLL="dsll"; # incidentally works even on n32 + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $PTR_SLL="sll"; + $SZREG=4; +} +# +# +# +###################################################################### + +$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; + +for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +if (!defined($big_endian)) + { $big_endian=(unpack('L',pack('N',1))==1); } + +# offsets of the Most and Least Significant Bytes +$MSB=$big_endian?0:3; +$LSB=3&~$MSB; + +@X=map("\$$_",(8..23)); # a4-a7,s0-s11 + +$ctx=$a0; +$inp=$a1; +$num=$a2; +$A="\$1"; +$B="\$2"; +$C="\$3"; +$D="\$7"; +$E="\$24"; @V=($A,$B,$C,$D,$E); +$t0="\$25"; +$t1=$num; # $num is offloaded to stack +$t2="\$30"; # fp +$K="\$31"; # ra + +sub BODY_00_14 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if (!$big_endian); + srl $t0,@X[$i],24 # byte swap($i) + srl $t1,@X[$i],8 + andi $t2,@X[$i],0xFF00 + sll @X[$i],@X[$i],24 + andi $t1,0xFF00 + sll $t2,$t2,8 + or @X[$i],$t0 + or $t1,$t2 + or @X[$i],$t1 +___ +$code.=<<___; + lwl @X[$j],$j*4+$MSB($inp) + sll $t0,$a,5 # $i + addu $e,$K + lwr @X[$j],$j*4+$LSB($inp) + srl $t1,$a,27 + addu $e,$t0 + xor $t0,$c,$d + addu $e,$t1 + sll $t2,$b,30 + and $t0,$b + srl $b,$b,2 + xor $t0,$d + addu $e,@X[$i] + or $b,$t2 + addu $e,$t0 +___ +} + +sub BODY_15_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; + +$code.=<<___ if (!$big_endian && $i==15); + srl $t0,@X[$i],24 # byte swap($i) + srl $t1,@X[$i],8 + andi $t2,@X[$i],0xFF00 + sll @X[$i],@X[$i],24 + andi $t1,0xFF00 + sll $t2,$t2,8 + or @X[$i],$t0 + or @X[$i],$t1 + or @X[$i],$t2 +___ +$code.=<<___; + xor @X[$j%16],@X[($j+2)%16] + sll $t0,$a,5 # $i + addu $e,$K + srl $t1,$a,27 + addu $e,$t0 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + sll $t2,$b,30 + and $t0,$b + srl $t1,@X[$j%16],31 + addu @X[$j%16],@X[$j%16] + srl $b,$b,2 + xor $t0,$d + or @X[$j%16],$t1 + addu $e,@X[$i%16] + or $b,$t2 + addu $e,$t0 +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); + xor @X[$j%16],@X[($j+2)%16] + sll $t0,$a,5 # $i + addu $e,$K + srl $t1,$a,27 + addu $e,$t0 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + sll $t2,$b,30 + xor $t0,$b + srl $t1,@X[$j%16],31 + addu @X[$j%16],@X[$j%16] + srl $b,$b,2 + addu $e,@X[$i%16] + or @X[$j%16],$t1 + or $b,$t2 + addu $e,$t0 +___ +$code.=<<___ if ($i==79); + lw @X[0],0($ctx) + sll $t0,$a,5 # $i + addu $e,$K + lw @X[1],4($ctx) + srl $t1,$a,27 + addu $e,$t0 + lw @X[2],8($ctx) + xor $t0,$c,$d + addu $e,$t1 + lw @X[3],12($ctx) + sll $t2,$b,30 + xor $t0,$b + lw @X[4],16($ctx) + srl $b,$b,2 + addu $e,@X[$i%16] + or $b,$t2 + addu $e,$t0 +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); + xor @X[$j%16],@X[($j+2)%16] + sll $t0,$a,5 # $i + addu $e,$K + srl $t1,$a,27 + addu $e,$t0 + xor @X[$j%16],@X[($j+8)%16] + and $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + sll $t2,$b,30 + addu $e,$t0 + srl $t1,@X[$j%16],31 + xor $t0,$c,$d + addu @X[$j%16],@X[$j%16] + and $t0,$b + srl $b,$b,2 + or @X[$j%16],$t1 + addu $e,@X[$i%16] + or $b,$t2 + addu $e,$t0 +___ +} + +$FRAMESIZE=16; # large enough to accomodate NUBI saved registers +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; + +$code=<<___; +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +.text + +.set noat +.set noreorder +.align 5 +.globl sha1_block_data_order +.ent sha1_block_data_order +sha1_block_data_order: + .frame $sp,$FRAMESIZE*$SZREG,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder + $PTR_SUB $sp,$FRAMESIZE*$SZREG + $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) + $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) + $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) + $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) + $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) + $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) + $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) + $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) + $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) + $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) + $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) + $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) + $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) + $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) +___ +$code.=<<___; + $PTR_SLL $num,6 + $PTR_ADD $num,$inp + $REG_S $num,0($sp) + lw $A,0($ctx) + lw $B,4($ctx) + lw $C,8($ctx) + lw $D,12($ctx) + b .Loop + lw $E,16($ctx) +.align 4 +.Loop: + .set reorder + lwl @X[0],$MSB($inp) + lui $K,0x5a82 + lwr @X[0],$LSB($inp) + ori $K,0x7999 # K_00_19 +___ +for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } +for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + lui $K,0x6ed9 + ori $K,0xeba1 # K_20_39 +___ +for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + lui $K,0x8f1b + ori $K,0xbcdc # K_40_59 +___ +for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + lui $K,0xca62 + ori $K,0xc1d6 # K_60_79 +___ +for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + $PTR_ADD $inp,64 + $REG_L $num,0($sp) + + addu $A,$X[0] + addu $B,$X[1] + sw $A,0($ctx) + addu $C,$X[2] + addu $D,$X[3] + sw $B,4($ctx) + addu $E,$X[4] + sw $C,8($ctx) + sw $D,12($ctx) + sw $E,16($ctx) + .set noreorder + bne $inp,$num,.Loop + nop + + .set noreorder + $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) + $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) + $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) + $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) + $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) + $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) + $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) + $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) + $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) + $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) + $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) + $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) + $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) + $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE*$SZREG +.end sha1_block_data_order +.rdata +.asciiz "SHA1 for MIPS, CRYPTOGAMS by " +___ +print $code; +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha1-parisc.pl b/app/openssl/crypto/sha/asm/sha1-parisc.pl new file mode 100644 index 00000000..6e5a328a --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-parisc.pl @@ -0,0 +1,260 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA1 block procedure for PA-RISC. + +# June 2009. +# +# On PA-7100LC performance is >30% better than gcc 3.2 generated code +# for aligned input and >50% better for unaligned. Compared to vendor +# compiler on PA-8600 it's almost 60% faster in 64-bit build and just +# few percent faster in 32-bit one (this for aligned input, data for +# unaligned input is not available). +# +# Special thanks to polarhome.com for providing HP-UX account. + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; +} else { + $LEVEL ="1.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; +} + +$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker + # [+ argument transfer] +$ctx="%r26"; # arg0 +$inp="%r25"; # arg1 +$num="%r24"; # arg2 + +$t0="%r28"; +$t1="%r29"; +$K="%r31"; + +@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", + "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); + +@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<15); + addl $K,$e,$e ; $i + shd $a,$a,27,$t1 + addl @X[$i],$e,$e + and $c,$b,$t0 + addl $t1,$e,$e + andcm $d,$b,$t1 + shd $b,$b,2,$b + or $t1,$t0,$t0 + addl $t0,$e,$e +___ +$code.=<<___ if ($i>=15); # with forward Xupdate + addl $K,$e,$e ; $i + shd $a,$a,27,$t1 + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + addl @X[$i%16],$e,$e + and $c,$b,$t0 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + addl $t1,$e,$e + andcm $d,$b,$t1 + shd $b,$b,2,$b + or $t1,$t0,$t0 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + add $t0,$e,$e + shd @X[$j%16],@X[$j%16],31,@X[$j%16] +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i + addl $K,$e,$e + shd $a,$a,27,$t1 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + addl @X[$i%16],$e,$e + xor $b,$c,$t0 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + addl $t1,$e,$e + shd $b,$b,2,$b + xor $d,$t0,$t0 + shd @X[$j%16],@X[$j%16],31,@X[$j%16] + addl $t0,$e,$e +___ +$code.=<<___ if ($i==79); # with context load + ldw 0($ctx),@X[0] ; $i + addl $K,$e,$e + shd $a,$a,27,$t1 + ldw 4($ctx),@X[1] + addl @X[$i%16],$e,$e + xor $b,$c,$t0 + ldw 8($ctx),@X[2] + addl $t1,$e,$e + shd $b,$b,2,$b + xor $d,$t0,$t0 + ldw 12($ctx),@X[3] + addl $t0,$e,$e + ldw 16($ctx),@X[4] +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___; + shd $a,$a,27,$t1 ; $i + addl $K,$e,$e + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + xor $d,$c,$t0 + addl @X[$i%16],$e,$e + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + and $b,$t0,$t0 + addl $t1,$e,$e + shd $b,$b,2,$b + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + addl $t0,$e,$e + and $d,$c,$t1 + shd @X[$j%16],@X[$j%16],31,@X[$j%16] + addl $t1,$e,$e +___ +} + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR +sha1_block_data_order + .PROC + .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) + $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) + $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) + $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) + $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) + $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) + + ldw 0($ctx),$A + ldw 4($ctx),$B + ldw 8($ctx),$C + ldw 12($ctx),$D + ldw 16($ctx),$E + + extru $inp,31,2,$t0 ; t0=inp&3; + sh3addl $t0,%r0,$t0 ; t0*=8; + subi 32,$t0,$t0 ; t0=32-t0; + mtctl $t0,%cr11 ; %sar=t0; + +L\$oop + ldi 3,$t0 + andcm $inp,$t0,$t0 ; 64-bit neutral +___ + for ($i=0;$i<15;$i++) { # load input block + $code.="\tldw `4*$i`($t0),@X[$i]\n"; } +$code.=<<___; + cmpb,*= $inp,$t0,L\$aligned + ldw 60($t0),@X[15] + ldw 64($t0),@X[16] +___ + for ($i=0;$i<16;$i++) { # align input + $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } +$code.=<<___; +L\$aligned + ldil L'0x5a827000,$K ; K_00_19 + ldo 0x999($K),$K +___ +for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0x6ed9e000,$K ; K_20_39 + ldo 0xba1($K),$K +___ + +for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0x8f1bb000,$K ; K_40_59 + ldo 0xcdc($K),$K +___ + +for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0xca62c000,$K ; K_60_79 + ldo 0x1d6($K),$K +___ +for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + addl @X[0],$A,$A + addl @X[1],$B,$B + addl @X[2],$C,$C + addl @X[3],$D,$D + addl @X[4],$E,$E + stw $A,0($ctx) + stw $B,4($ctx) + stw $C,8($ctx) + stw $D,12($ctx) + stw $E,16($ctx) + addib,*<> -1,$num,L\$oop + ldo 64($inp),$inp + + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 + $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 + $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 + $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 + $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 + $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by " +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/,\*/,/gm if ($SIZE_T==4); +$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); +print $code; +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha1-ppc.pl b/app/openssl/crypto/sha/asm/sha1-ppc.pl index dcd0fcdf..2140dd2f 100755 --- a/app/openssl/crypto/sha/asm/sha1-ppc.pl +++ b/app/openssl/crypto/sha/asm/sha1-ppc.pl @@ -24,12 +24,14 @@ $flavour = shift; if ($flavour =~ /64/) { $SIZE_T =8; + $LRSAVE =2*$SIZE_T; $UCMP ="cmpld"; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; } elsif ($flavour =~ /32/) { $SIZE_T =4; + $LRSAVE =$SIZE_T; $UCMP ="cmplw"; $STU ="stwu"; $POP ="lwz"; @@ -43,7 +45,8 @@ die "can't locate ppc-xlate.pl"; open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; -$FRAME=24*$SIZE_T; +$FRAME=24*$SIZE_T+64; +$LOCALS=6*$SIZE_T; $K ="r0"; $sp ="r1"; @@ -162,9 +165,8 @@ $code=<<___; .globl .sha1_block_data_order .align 4 .sha1_block_data_order: + $STU $sp,-$FRAME($sp) mflr r0 - $STU $sp,`-($FRAME+64)`($sp) - $PUSH r0,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) @@ -182,6 +184,7 @@ $code=<<___; $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) lwz $A,0($ctx) lwz $B,4($ctx) lwz $C,8($ctx) @@ -192,37 +195,14 @@ $code=<<___; Laligned: mtctr $num bl Lsha1_block_private -Ldone: - $POP r0,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,`$FRAME+64` - blr -___ + b Ldone -# PowerPC specification allows an implementation to be ill-behaved -# upon unaligned access which crosses page boundary. "Better safe -# than sorry" principle makes me treat it specially. But I don't -# look for particular offending word, but rather for 64-byte input -# block which crosses the boundary. Once found that block is aligned -# and hashed separately... -$code.=<<___; +; PowerPC specification allows an implementation to be ill-behaved +; upon unaligned access which crosses page boundary. "Better safe +; than sorry" principle makes me treat it specially. But I don't +; look for particular offending word, but rather for 64-byte input +; block which crosses the boundary. Once found that block is aligned +; and hashed separately... .align 4 Lunaligned: subfic $t1,$inp,4096 @@ -237,7 +217,7 @@ Lunaligned: Lcross_page: li $t1,16 mtctr $t1 - addi r20,$sp,$FRAME ; spot below the frame + addi r20,$sp,$LOCALS ; spot within the frame Lmemcpy: lbz r16,0($inp) lbz r17,1($inp) @@ -251,15 +231,40 @@ Lmemcpy: addi r20,r20,4 bdnz Lmemcpy - $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) + $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) li $t1,1 - addi $inp,$sp,$FRAME + addi $inp,$sp,$LOCALS mtctr $t1 bl Lsha1_block_private - $POP $inp,`$FRAME-$SIZE_T*19`($sp) + $POP $inp,`$FRAME-$SIZE_T*18`($sp) addic. $num,$num,-1 bne- Lunaligned - b Ldone + +Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) + $POP r17,`$FRAME-$SIZE_T*15`($sp) + $POP r18,`$FRAME-$SIZE_T*14`($sp) + $POP r19,`$FRAME-$SIZE_T*13`($sp) + $POP r20,`$FRAME-$SIZE_T*12`($sp) + $POP r21,`$FRAME-$SIZE_T*11`($sp) + $POP r22,`$FRAME-$SIZE_T*10`($sp) + $POP r23,`$FRAME-$SIZE_T*9`($sp) + $POP r24,`$FRAME-$SIZE_T*8`($sp) + $POP r25,`$FRAME-$SIZE_T*7`($sp) + $POP r26,`$FRAME-$SIZE_T*6`($sp) + $POP r27,`$FRAME-$SIZE_T*5`($sp) + $POP r28,`$FRAME-$SIZE_T*4`($sp) + $POP r29,`$FRAME-$SIZE_T*3`($sp) + $POP r30,`$FRAME-$SIZE_T*2`($sp) + $POP r31,`$FRAME-$SIZE_T*1`($sp) + mtlr r0 + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 ___ # This is private block function, which uses tailored calling @@ -309,6 +314,8 @@ $code.=<<___; addi $inp,$inp,`16*4` bdnz- Lsha1_block_private blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ___ $code.=<<___; .asciz "SHA1 block transform for PPC, CRYPTOGAMS by " diff --git a/app/openssl/crypto/sha/asm/sha1-s390x.pl b/app/openssl/crypto/sha/asm/sha1-s390x.pl index 4b178482..9193dda4 100644 --- a/app/openssl/crypto/sha/asm/sha1-s390x.pl +++ b/app/openssl/crypto/sha/asm/sha1-s390x.pl @@ -21,9 +21,28 @@ # instructions to favour dual-issue z10 pipeline. On z10 hardware is # "only" ~2.3x faster than software. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. + $kimdfunc=1; # magic function code for kimd instruction -$output=shift; +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $K_00_39="%r0"; $K=$K_00_39; @@ -42,13 +61,14 @@ $t1="%r11"; @X=("%r12","%r13","%r14"); $sp="%r15"; -$frame=160+16*4; +$stdframe=16*$SIZE_T+4*8; +$frame=$stdframe+16*4; sub Xupdate { my $i=shift; $code.=<<___ if ($i==15); - lg $prefetch,160($sp) ### Xupdate(16) warm-up + lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up lr $X[0],$X[2] ___ return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle @@ -58,8 +78,8 @@ $code.=<<___ if ($i<16); ___ $code.=<<___ if ($i>=16); xgr $X[0],$prefetch ### Xupdate($i) - lg $prefetch,`160+4*(($i+2)%16)`($sp) - xg $X[0],`160+4*(($i+8)%16)`($sp) + lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) + xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) xgr $X[0],$prefetch rll $X[0],$X[0],1 rllg $X[1],$X[0],32 @@ -68,7 +88,7 @@ $code.=<<___ if ($i>=16); lr $X[2],$X[1] # feedback ___ $code.=<<___ if ($i<=70); - stg $X[0],`160+4*($i%16)`($sp) + stg $X[0],`$stdframe+4*($i%16)`($sp) ___ unshift(@X,pop(@X)); } @@ -148,9 +168,9 @@ $code.=<<___ if ($kimdfunc); tmhl %r0,0x4000 # check for message-security assist jz .Lsoftware lghi %r0,0 - la %r1,16($sp) + la %r1,`2*$SIZE_T`($sp) .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,16($sp) + lg %r0,`2*$SIZE_T`($sp) tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc @@ -165,11 +185,11 @@ $code.=<<___ if ($kimdfunc); ___ $code.=<<___; lghi %r1,-$frame - stg $ctx,16($sp) - stmg %r6,%r15,48($sp) + st${g} $ctx,`2*$SIZE_T`($sp) + stm${g} %r6,%r15,`6*$SIZE_T`($sp) lgr %r0,$sp la $sp,0(%r1,$sp) - stg %r0,0($sp) + st${g} %r0,0($sp) larl $t0,Ktable llgf $A,0($ctx) @@ -199,7 +219,7 @@ ___ for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - lg $ctx,`$frame+16`($sp) + l${g} $ctx,`$frame+2*$SIZE_T`($sp) la $inp,64($inp) al $A,0($ctx) al $B,4($ctx) @@ -211,13 +231,13 @@ $code.=<<___; st $C,8($ctx) st $D,12($ctx) st $E,16($ctx) - brct $len,.Lloop + brct${g} $len,.Lloop - lmg %r6,%r15,`$frame+48`($sp) + lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) br %r14 .size sha1_block_data_order,.-sha1_block_data_order .string "SHA1 block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,8,8 +.comm OPENSSL_s390xcap_P,16,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl b/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl index 85e8d680..e65291bb 100644 --- a/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl +++ b/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl @@ -549,7 +549,7 @@ ___ # programmer detect if current CPU is VIS capable at run-time. sub unvis { my ($mnemonic,$rs1,$rs2,$rd)=@_; -my $ref,$opf; +my ($ref,$opf); my %visopf = ( "fmul8ulx16" => 0x037, "faligndata" => 0x048, "fpadd32" => 0x052, diff --git a/app/openssl/crypto/sha/asm/sha1-x86_64.S b/app/openssl/crypto/sha/asm/sha1-x86_64.S new file mode 100644 index 00000000..3922e203 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha1-x86_64.S @@ -0,0 +1,2486 @@ +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/app/openssl/crypto/sha/asm/sha1-x86_64.pl b/app/openssl/crypto/sha/asm/sha1-x86_64.pl index 4edc5ea9..f15c7ec3 100755 --- a/app/openssl/crypto/sha/asm/sha1-x86_64.pl +++ b/app/openssl/crypto/sha/asm/sha1-x86_64.pl @@ -16,7 +16,7 @@ # There was suggestion to mechanically translate 32-bit code, but I # dismissed it, reasoning that x86_64 offers enough register bank # capacity to fully utilize SHA-1 parallelism. Therefore this fresh -# implementation:-) However! While 64-bit code does performs better +# implementation:-) However! While 64-bit code does perform better # on Opteron, I failed to beat 32-bit assembler on EM64T core. Well, # x86_64 does offer larger *addressable* bank, but out-of-order core # reaches for even more registers through dynamic aliasing, and EM64T @@ -29,6 +29,38 @@ # Xeon P4 +65% +0% 9.9 # Core2 +60% +10% 7.0 +# August 2009. +# +# The code was revised to minimize code size and to maximize +# "distance" between instructions producing input to 'lea' +# instruction and the 'lea' instruction itself, which is essential +# for Intel Atom core. + +# October 2010. +# +# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it +# is to offload message schedule denoted by Wt in NIST specification, +# or Xupdate in OpenSSL source, to SIMD unit. See sha1-586.pl module +# for background and implementation details. The only difference from +# 32-bit code is that 64-bit code doesn't have to spill @X[] elements +# to free temporary registers. + +# April 2011. +# +# Add AVX code path. See sha1-586.pl for further information. + +###################################################################### +# Current performance is summarized in following table. Numbers are +# CPU clock cycles spent to process single byte (less is better). +# +# x86_64 SSSE3 AVX +# P4 9.8 - +# Opteron 6.6 - +# Core2 6.7 6.1/+10% - +# Atom 11.0 9.7/+13% - +# Westmere 7.1 5.6/+27% - +# Sandy Bridge 7.9 6.3/+25% 5.2/+51% + $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } @@ -40,7 +72,18 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/ && + $1>=2.19); +$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && + $1>=2.09); +$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./ && + $1>=10); + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $ctx="%rdi"; # 1st arg $inp="%rsi"; # 2nd arg @@ -51,196 +94,994 @@ $ctx="%r8"; $inp="%r9"; $num="%r10"; -$xi="%eax"; -$t0="%ebx"; -$t1="%ecx"; -$A="%edx"; -$B="%esi"; -$C="%edi"; -$D="%ebp"; -$E="%r11d"; -$T="%r12d"; - -@V=($A,$B,$C,$D,$E,$T); +$t0="%eax"; +$t1="%ebx"; +$t2="%ecx"; +@xi=("%edx","%ebp"); +$A="%esi"; +$B="%edi"; +$C="%r11d"; +$D="%r12d"; +$E="%r13d"; -sub PROLOGUE { -my $func=shift; -$code.=<<___; -.globl $func -.type $func,\@function,3 -.align 16 -$func: - push %rbx - push %rbp - push %r12 - mov %rsp,%r11 - mov %rdi,$ctx # reassigned argument - sub \$`8+16*4`,%rsp - mov %rsi,$inp # reassigned argument - and \$-64,%rsp - mov %rdx,$num # reassigned argument - mov %r11,`16*4`(%rsp) -.Lprologue: - - mov 0($ctx),$A - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov 16($ctx),$E -___ -} - -sub EPILOGUE { -my $func=shift; -$code.=<<___; - mov `16*4`(%rsp),%rsi - mov (%rsi),%r12 - mov 8(%rsi),%rbp - mov 16(%rsi),%rbx - lea 24(%rsi),%rsp -.Lepilogue: - ret -.size $func,.-$func -___ -} +@V=($A,$B,$C,$D,$E); sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e,$f,$host)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i==0); - mov `4*$i`($inp),$xi - `"bswap $xi" if(!defined($host))` - mov $xi,`4*$i`(%rsp) + mov `4*$i`($inp),$xi[0] + bswap $xi[0] + mov $xi[0],`4*$i`(%rsp) ___ $code.=<<___ if ($i<15); - lea 0x5a827999($xi,$e),$f mov $c,$t0 - mov `4*$j`($inp),$xi - mov $a,$e + mov `4*$j`($inp),$xi[1] + mov $a,$t2 xor $d,$t0 - `"bswap $xi" if(!defined($host))` - rol \$5,$e + bswap $xi[1] + rol \$5,$t2 + lea 0x5a827999($xi[0],$e),$e and $b,$t0 - mov $xi,`4*$j`(%rsp) - add $e,$f + mov $xi[1],`4*$j`(%rsp) + add $t2,$e xor $d,$t0 rol \$30,$b - add $t0,$f + add $t0,$e ___ $code.=<<___ if ($i>=15); - lea 0x5a827999($xi,$e),$f - mov `4*($j%16)`(%rsp),$xi + mov `4*($j%16)`(%rsp),$xi[1] mov $c,$t0 - mov $a,$e - xor `4*(($j+2)%16)`(%rsp),$xi + mov $a,$t2 + xor `4*(($j+2)%16)`(%rsp),$xi[1] xor $d,$t0 - rol \$5,$e - xor `4*(($j+8)%16)`(%rsp),$xi + rol \$5,$t2 + xor `4*(($j+8)%16)`(%rsp),$xi[1] and $b,$t0 - add $e,$f - xor `4*(($j+13)%16)`(%rsp),$xi + lea 0x5a827999($xi[0],$e),$e + xor `4*(($j+13)%16)`(%rsp),$xi[1] xor $d,$t0 + rol \$1,$xi[1] + add $t2,$e rol \$30,$b - add $t0,$f - rol \$1,$xi - mov $xi,`4*($j%16)`(%rsp) + mov $xi[1],`4*($j%16)`(%rsp) + add $t0,$e ___ +unshift(@xi,pop(@xi)); } sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e,$f)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; my $K=($i<40)?0x6ed9eba1:0xca62c1d6; $code.=<<___ if ($i<79); - lea $K($xi,$e),$f - mov `4*($j%16)`(%rsp),$xi + mov `4*($j%16)`(%rsp),$xi[1] mov $c,$t0 - mov $a,$e - xor `4*(($j+2)%16)`(%rsp),$xi + mov $a,$t2 + xor `4*(($j+2)%16)`(%rsp),$xi[1] xor $b,$t0 - rol \$5,$e - xor `4*(($j+8)%16)`(%rsp),$xi + rol \$5,$t2 + lea $K($xi[0],$e),$e + xor `4*(($j+8)%16)`(%rsp),$xi[1] xor $d,$t0 - add $e,$f - xor `4*(($j+13)%16)`(%rsp),$xi + add $t2,$e + xor `4*(($j+13)%16)`(%rsp),$xi[1] rol \$30,$b - add $t0,$f - rol \$1,$xi + add $t0,$e + rol \$1,$xi[1] ___ $code.=<<___ if ($i<76); - mov $xi,`4*($j%16)`(%rsp) + mov $xi[1],`4*($j%16)`(%rsp) ___ $code.=<<___ if ($i==79); - lea $K($xi,$e),$f mov $c,$t0 - mov $a,$e + mov $a,$t2 xor $b,$t0 - rol \$5,$e + lea $K($xi[0],$e),$e + rol \$5,$t2 xor $d,$t0 - add $e,$f + add $t2,$e rol \$30,$b - add $t0,$f + add $t0,$e ___ +unshift(@xi,pop(@xi)); } sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e,$f)=@_; +my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___; - lea 0x8f1bbcdc($xi,$e),$f - mov `4*($j%16)`(%rsp),$xi - mov $b,$t0 - mov $b,$t1 - xor `4*(($j+2)%16)`(%rsp),$xi - mov $a,$e - and $c,$t0 - xor `4*(($j+8)%16)`(%rsp),$xi - or $c,$t1 - rol \$5,$e - xor `4*(($j+13)%16)`(%rsp),$xi - and $d,$t1 - add $e,$f - rol \$1,$xi - or $t1,$t0 + mov `4*($j%16)`(%rsp),$xi[1] + mov $c,$t0 + mov $c,$t1 + xor `4*(($j+2)%16)`(%rsp),$xi[1] + and $d,$t0 + mov $a,$t2 + xor `4*(($j+8)%16)`(%rsp),$xi[1] + xor $d,$t1 + lea 0x8f1bbcdc($xi[0],$e),$e + rol \$5,$t2 + xor `4*(($j+13)%16)`(%rsp),$xi[1] + add $t0,$e + and $b,$t1 + rol \$1,$xi[1] + add $t1,$e rol \$30,$b - mov $xi,`4*($j%16)`(%rsp) - add $t0,$f + mov $xi[1],`4*($j%16)`(%rsp) + add $t2,$e ___ +unshift(@xi,pop(@xi)); } -$code=".text\n"; +$code.=<<___; +.text +.extern OPENSSL_ia32cap_P -&PROLOGUE("sha1_block_data_order"); -$code.=".align 4\n.Lloop:\n"; +.globl sha1_block_data_order +.type sha1_block_data_order,\@function,3 +.align 16 +sha1_block_data_order: + mov OPENSSL_ia32cap_P+0(%rip),%r9d + mov OPENSSL_ia32cap_P+4(%rip),%r8d + test \$`1<<9`,%r8d # check SSSE3 bit + jz .Lialu +___ +$code.=<<___ if ($avx); + and \$`1<<28`,%r8d # mask AVX bit + and \$`1<<30`,%r9d # mask "Intel CPU" bit + or %r9d,%r8d + cmp \$`1<<28|1<<30`,%r8d + je _avx_shortcut +___ +$code.=<<___; + jmp _ssse3_shortcut + +.align 16 +.Lialu: + push %rbx + push %rbp + push %r12 + push %r13 + mov %rsp,%r11 + mov %rdi,$ctx # reassigned argument + sub \$`8+16*4`,%rsp + mov %rsi,$inp # reassigned argument + and \$-64,%rsp + mov %rdx,$num # reassigned argument + mov %r11,`16*4`(%rsp) +.Lprologue: + + mov 0($ctx),$A + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov 16($ctx),$E + jmp .Lloop + +.align 16 +.Lloop: +___ for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - add 0($ctx),$E - add 4($ctx),$T - add 8($ctx),$A - add 12($ctx),$B - add 16($ctx),$C - mov $E,0($ctx) - mov $T,4($ctx) - mov $A,8($ctx) - mov $B,12($ctx) - mov $C,16($ctx) - - xchg $E,$A # mov $E,$A - xchg $T,$B # mov $T,$B - xchg $E,$C # mov $A,$C - xchg $T,$D # mov $B,$D - # mov $C,$E - lea `16*4`($inp),$inp + add 0($ctx),$A + add 4($ctx),$B + add 8($ctx),$C + add 12($ctx),$D + add 16($ctx),$E + mov $A,0($ctx) + mov $B,4($ctx) + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + sub \$1,$num + lea `16*4`($inp),$inp jnz .Lloop + + mov `16*4`(%rsp),%rsi + mov (%rsi),%r13 + mov 8(%rsi),%r12 + mov 16(%rsi),%rbp + mov 24(%rsi),%rbx + lea 32(%rsi),%rsp +.Lepilogue: + ret +.size sha1_block_data_order,.-sha1_block_data_order ___ -&EPILOGUE("sha1_block_data_order"); +{{{ +my $Xi=4; +my @X=map("%xmm$_",(4..7,0..3)); +my @Tx=map("%xmm$_",(8..10)); +my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization +my @T=("%esi","%edi"); +my $j=0; +my $K_XX_XX="%r11"; + +my $_rol=sub { &rol(@_) }; +my $_ror=sub { &ror(@_) }; + +$code.=<<___; +.type sha1_block_data_order_ssse3,\@function,3 +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + push %rbx + push %rbp + push %r12 + lea `-64-($win64?5*16:0)`(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,64+0(%rsp) + movaps %xmm7,64+16(%rsp) + movaps %xmm8,64+32(%rsp) + movaps %xmm9,64+48(%rsp) + movaps %xmm10,64+64(%rsp) +.Lprologue_ssse3: +___ +$code.=<<___; + mov %rdi,$ctx # reassigned argument + mov %rsi,$inp # reassigned argument + mov %rdx,$num # reassigned argument + + shl \$6,$num + add $inp,$num + lea K_XX_XX(%rip),$K_XX_XX + + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + + movdqa 64($K_XX_XX),@X[2] # pbswap mask + movdqa 0($K_XX_XX),@Tx[1] # K_00_19 + movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + movdqu 16($inp),@X[-3&7] + movdqu 32($inp),@X[-2&7] + movdqu 48($inp),@X[-1&7] + pshufb @X[2],@X[-4&7] # byte swap + add \$64,$inp + pshufb @X[2],@X[-3&7] + pshufb @X[2],@X[-2&7] + pshufb @X[2],@X[-1&7] + paddd @Tx[1],@X[-4&7] # add K_00_19 + paddd @Tx[1],@X[-3&7] + paddd @Tx[1],@X[-2&7] + movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU + psubd @Tx[1],@X[-4&7] # restore X[] + movdqa @X[-3&7],16(%rsp) + psubd @Tx[1],@X[-3&7] + movdqa @X[-2&7],32(%rsp) + psubd @Tx[1],@X[-2&7] + jmp .Loop_ssse3 +___ + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + &movdqa (@X[0],@X[-3&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[0],@X[-1&7]); + &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &psrldq (@Tx[0],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (@Tx[2],@X[0]); + &movdqa (@Tx[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword + &paddd (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@Tx[0],31); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[1],@Tx[2]); + eval(shift(@insns)); + eval(shift(@insns)); + + &psrld (@Tx[2],30); + &por (@X[0],@Tx[0]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &pslld (@Tx[1],2); + &pxor (@X[0],@Tx[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xupdate_ssse3_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); + eval(shift(@insns)); # body_20_39 + &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + if ($Xi%5) { + &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... + } else { # ... or load next one + &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); + } + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &movdqa (@Tx[0],@X[0]); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &pslld (@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &psrld (@Tx[0],30); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &por (@X[0],@Tx[0]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &movdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xuplast_ssse3_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &paddd (@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &cmp ($inp,$num); + &je (".Ldone_ssse3"); + + unshift(@Tx,pop(@Tx)); + + &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask + &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 + &movdqu (@X[-4&7],"0($inp)"); # load input + &movdqu (@X[-3&7],"16($inp)"); + &movdqu (@X[-2&7],"32($inp)"); + &movdqu (@X[-1&7],"48($inp)"); + &pshufb (@X[-4&7],@X[2]); # byte swap + &add ($inp,64); + + $Xi=0; +} + +sub Xloop_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &pshufb (@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[($Xi-4)&7],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + &psubd (@X[($Xi-4)&7],@Tx[1]); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_ssse3() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +sub body_00_19 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer + '&xor ($c,$d);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&xor ($c,$d);', # restore $c + '&xor (@T[0],$d);', + '&add ($e,$a);', + '&$_ror ($b,$j?7:2);', # $b>>>2 + '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub body_20_39 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer + '&xor (@T[0],$d);', # ($b^$d) + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&xor (@T[0],$c);', # ($b^$d^$c) + '&add ($e,$a);', + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub body_40_59 () { + ( + '($a,$b,$c,$d,$e)=@V;'. + '&mov (@T[1],$c);', + '&xor ($c,$d);', + '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer + '&and (@T[1],$d);', + '&and (@T[0],$c);', # ($b&($c^$d)) + '&$_ror ($b,7);', # $b>>>2 + '&add ($e,@T[1]);', + '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', + '&add ($e,@T[0]);', + '&xor ($c,$d);', # restore $c + '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} $code.=<<___; -.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by " .align 16 +.Loop_ssse3: +___ + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_16_31(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_00_19); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_40_59); + &Xupdate_ssse3_32_79(\&body_20_39); + &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + &Xloop_ssse3(\&body_20_39); + +$code.=<<___; + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: +___ + $j=$saved_j; @V=@saved_V; + + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + &Xtail_ssse3(\&body_20_39); + +$code.=<<___; + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) +___ +$code.=<<___ if ($win64); + movaps 64+0(%rsp),%xmm6 + movaps 64+16(%rsp),%xmm7 + movaps 64+32(%rsp),%xmm8 + movaps 64+48(%rsp),%xmm9 + movaps 64+64(%rsp),%xmm10 +___ +$code.=<<___; + lea `64+($win64?5*16:0)`(%rsp),%rsi + mov 0(%rsi),%r12 + mov 8(%rsi),%rbp + mov 16(%rsi),%rbx + lea 24(%rsi),%rsp +.Lepilogue_ssse3: + ret +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +___ + +if ($avx) { +my $Xi=4; +my @X=map("%xmm$_",(4..7,0..3)); +my @Tx=map("%xmm$_",(8..10)); +my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization +my @T=("%esi","%edi"); +my $j=0; +my $K_XX_XX="%r11"; + +my $_rol=sub { &shld(@_[0],@_) }; +my $_ror=sub { &shrd(@_[0],@_) }; + +$code.=<<___; +.type sha1_block_data_order_avx,\@function,3 +.align 16 +sha1_block_data_order_avx: +_avx_shortcut: + push %rbx + push %rbp + push %r12 + lea `-64-($win64?5*16:0)`(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,64+0(%rsp) + movaps %xmm7,64+16(%rsp) + movaps %xmm8,64+32(%rsp) + movaps %xmm9,64+48(%rsp) + movaps %xmm10,64+64(%rsp) +.Lprologue_avx: +___ +$code.=<<___; + mov %rdi,$ctx # reassigned argument + mov %rsi,$inp # reassigned argument + mov %rdx,$num # reassigned argument + vzeroupper + + shl \$6,$num + add $inp,$num + lea K_XX_XX(%rip),$K_XX_XX + + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + + vmovdqa 64($K_XX_XX),@X[2] # pbswap mask + vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 + vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + vmovdqu 16($inp),@X[-3&7] + vmovdqu 32($inp),@X[-2&7] + vmovdqu 48($inp),@X[-1&7] + vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap + add \$64,$inp + vpshufb @X[2],@X[-3&7],@X[-3&7] + vpshufb @X[2],@X[-2&7],@X[-2&7] + vpshufb @X[2],@X[-1&7],@X[-1&7] + vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 + vpaddd @Tx[1],@X[-3&7],@X[1] + vpaddd @Tx[1],@X[-2&7],@X[2] + vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU + vmovdqa @X[1],16(%rsp) + vmovdqa @X[2],32(%rsp) + jmp .Loop_avx +___ + +sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 40 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[0],@X[0],31); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword + &vpaddd (@X[0],@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[1],@Tx[2],30); + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslld (@Tx[2],@Tx[2],2); + &vpxor (@X[0],@X[0],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xupdate_avx_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my ($a,$b,$c,$d,$e); + + &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + if ($Xi%5) { + &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... + } else { # ... or load next one + &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); + } + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]" + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + + &vpsrld (@Tx[0],@X[0],30); + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpslld (@X[0],@X[0],2); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # ror + eval(shift(@insns)); + + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)); + &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # rol + eval(shift(@insns)); + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; push(@X,shift(@X)); # "rotate" X[] + push(@Tx,shift(@Tx)); +} + +sub Xuplast_avx_80() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + + foreach (@insns) { eval; } # remaining instructions + + &cmp ($inp,$num); + &je (".Ldone_avx"); + + unshift(@Tx,pop(@Tx)); + + &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask + &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 + &vmovdqu(@X[-4&7],"0($inp)"); # load input + &vmovdqu(@X[-3&7],"16($inp)"); + &vmovdqu(@X[-2&7],"32($inp)"); + &vmovdqu(@X[-1&7],"48($inp)"); + &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap + &add ($inp,64); + + $Xi=0; +} + +sub Xloop_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + eval(shift(@insns)); + eval(shift(@insns)); + &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU + eval(shift(@insns)); + eval(shift(@insns)); + + foreach (@insns) { eval; } + $Xi++; +} + +sub Xtail_avx() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + +$code.=<<___; +.align 16 +.Loop_avx: +___ + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_16_31(\&body_00_19); + &Xupdate_avx_32_79(\&body_00_19); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_20_39); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_40_59); + &Xupdate_avx_32_79(\&body_20_39); + &Xuplast_avx_80(\&body_20_39); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + &Xloop_avx(\&body_20_39); + +$code.=<<___; + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + jmp .Loop_avx + +.align 16 +.Ldone_avx: +___ + $j=$saved_j; @V=@saved_V; + + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + &Xtail_avx(\&body_20_39); + +$code.=<<___; + vzeroupper + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) +___ +$code.=<<___ if ($win64); + movaps 64+0(%rsp),%xmm6 + movaps 64+16(%rsp),%xmm7 + movaps 64+32(%rsp),%xmm8 + movaps 64+48(%rsp),%xmm9 + movaps 64+64(%rsp),%xmm10 +___ +$code.=<<___; + lea `64+($win64?5*16:0)`(%rsp),%rsi + mov 0(%rsi),%r12 + mov 8(%rsi),%rbp + mov 16(%rsi),%rbx + lea 24(%rsi),%rsp +.Lepilogue_avx: + ret +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx +___ +} +$code.=<<___; +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask +___ +}}} +$code.=<<___; +.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by " +.align 64 ___ # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, @@ -272,25 +1113,75 @@ se_handler: lea .Lprologue(%rip),%r10 cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue + jb .Lcommon_seh_tail mov 152($context),%rax # pull context->Rsp lea .Lepilogue(%rip),%r10 cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue + jae .Lcommon_seh_tail mov `16*4`(%rax),%rax # pull saved stack pointer - lea 24(%rax),%rax + lea 32(%rax),%rax mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 + mov -32(%rax),%r13 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + + jmp .Lcommon_seh_tail +.size se_handler,.-se_handler -.Lin_prologue: +.type ssse3_handler,\@abi-omnipotent +.align 16 +ssse3_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + lea 64(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$10,%ecx + .long 0xa548f3fc # cld; rep movsq + lea `24+64+5*16`(%rax),%rax # adjust stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore cotnext->R12 + +.Lcommon_seh_tail: mov 8(%rax),%rdi mov 16(%rax),%rsi mov %rax,152($context) # restore context->Rsp @@ -328,19 +1219,38 @@ se_handler: pop %rdi pop %rsi ret -.size se_handler,.-se_handler +.size ssse3_handler,.-ssse3_handler .section .pdata .align 4 .rva .LSEH_begin_sha1_block_data_order .rva .LSEH_end_sha1_block_data_order .rva .LSEH_info_sha1_block_data_order - + .rva .LSEH_begin_sha1_block_data_order_ssse3 + .rva .LSEH_end_sha1_block_data_order_ssse3 + .rva .LSEH_info_sha1_block_data_order_ssse3 +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_sha1_block_data_order_avx + .rva .LSEH_end_sha1_block_data_order_avx + .rva .LSEH_info_sha1_block_data_order_avx +___ +$code.=<<___; .section .xdata .align 8 .LSEH_info_sha1_block_data_order: .byte 9,0,0,0 .rva se_handler +.LSEH_info_sha1_block_data_order_ssse3: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_sha1_block_data_order_avx: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] ___ } diff --git a/app/openssl/crypto/sha/asm/sha256-586.S b/app/openssl/crypto/sha/asm/sha256-586.S new file mode 100644 index 00000000..77a89514 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha256-586.S @@ -0,0 +1,258 @@ +.file "sha512-586.s" +.text +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: +.L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +.align 16 +.L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $64,%edi + subl $32,%esp + movl %edi,100(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edi,28(%esp) +.align 16 +.L00300_15: + movl 92(%esp),%ebx + movl %edx,%ecx + rorl $14,%ecx + movl 20(%esp),%esi + xorl %edx,%ecx + rorl $5,%ecx + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx + xorl %edi,%esi + movl %edx,16(%esp) + movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi + addl %esi,%ebx + rorl $9,%ecx + addl 28(%esp),%ebx + xorl %eax,%ecx + rorl $11,%ecx + movl 4(%esp),%esi + xorl %eax,%ecx + rorl $2,%ecx + addl %ebx,%edx + movl 8(%esp),%edi + addl %ecx,%ebx + movl %eax,(%esp) + movl %eax,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax + movl (%ebp),%esi + orl %ecx,%eax + addl $4,%ebp + addl %ebx,%eax + addl %esi,%edx + addl %esi,%eax + cmpl $3248222580,%esi + jne .L00300_15 + movl 152(%esp),%ebx +.align 16 +.L00416_63: + movl %ebx,%esi + movl 100(%esp),%ecx + rorl $11,%esi + movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi + shrl $3,%ebx + rorl $2,%edi + xorl %esi,%ebx + xorl %ecx,%edi + rorl $17,%edi + shrl $10,%ecx + addl 156(%esp),%ebx + xorl %ecx,%edi + addl 120(%esp),%ebx + movl %edx,%ecx + addl %edi,%ebx + rorl $14,%ecx + movl 20(%esp),%esi + xorl %edx,%ecx + rorl $5,%ecx + movl %ebx,92(%esp) + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx + xorl %edi,%esi + movl %edx,16(%esp) + movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi + addl %esi,%ebx + rorl $9,%ecx + addl 28(%esp),%ebx + xorl %eax,%ecx + rorl $11,%ecx + movl 4(%esp),%esi + xorl %eax,%ecx + rorl $2,%ecx + addl %ebx,%edx + movl 8(%esp),%edi + addl %ecx,%ebx + movl %eax,(%esp) + movl %eax,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax + movl (%ebp),%esi + orl %ecx,%eax + addl $4,%ebp + addl %ebx,%eax + movl 152(%esp),%ebx + addl %esi,%edx + addl %esi,%eax + cmpl $3329325298,%esi + jne .L00416_63 + movl 352(%esp),%esi + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edi + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%ecx + addl 12(%esi),%edi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edi,12(%esi) + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 356(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + addl $352,%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573 +.long 961987163,1508970993,2453635748,2870763221 +.long 3624381080,310598401,607225278,1426881987 +.long 1925078388,2162078206,2614888103,3248222580 +.long 3835390401,4022224774,264347078,604807628 +.long 770255983,1249150122,1555081692,1996064986 +.long 2554220882,2821834349,2952996808,3210313671 +.long 3336571891,3584528711,113926993,338241895 +.long 666307205,773529912,1294757372,1396182291 +.long 1695183700,1986661051,2177026350,2456956037 +.long 2730485921,2820302411,3259730800,3345764771 +.long 3516065817,3600352804,4094571909,275423344 +.long 430227734,506948616,659060556,883997877 +.long 958139571,1322822218,1537002063,1747873779 +.long 1955562222,2024104815,2227730452,2361852424 +.long 2428436474,2756734187,3204031479,3329325298 +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 diff --git a/app/openssl/crypto/sha/asm/sha256-586.pl b/app/openssl/crypto/sha/asm/sha256-586.pl index ecc8b69c..52a7c7f8 100644 --- a/app/openssl/crypto/sha/asm/sha256-586.pl +++ b/app/openssl/crypto/sha/asm/sha256-586.pl @@ -14,14 +14,14 @@ # Pentium PIII P4 AMD K8 Core2 # gcc 46 36 41 27 26 # icc 57 33 38 25 23 -# x86 asm 40 30 35 20 20 -# x86_64 asm(*) - - 21 15.8 16.5 +# x86 asm 40 30 33 20 18 +# x86_64 asm(*) - - 21 16 16 # # (*) x86_64 assembler performance is presented for reference # purposes. # # Performance improvement over compiler generated code varies from -# 10% to 40% [see above]. Not very impressive on some µ-archs, but +# 10% to 40% [see above]. Not very impressive on some µ-archs, but # it's 5 times smaller and optimizies amount of writes. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; @@ -48,20 +48,19 @@ sub BODY_00_15() { my $in_16_63=shift; &mov ("ecx",$E); - &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7] - &ror ("ecx",6); - &mov ("edi",$E); - &ror ("edi",11); + &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2]) + &ror ("ecx",25-11); &mov ("esi",$Foff); - &xor ("ecx","edi"); - &ror ("edi",25-11); + &xor ("ecx",$E); + &ror ("ecx",11-6); &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0] - &xor ("ecx","edi"); # Sigma1(e) + &xor ("ecx",$E); + &ror ("ecx",6); # Sigma1(e) &mov ("edi",$Goff); &add ($T,"ecx"); # T += Sigma1(e) - &mov ($Eoff,$E); # modulo-scheduled &xor ("esi","edi"); + &mov ($Eoff,$E); # modulo-scheduled &mov ("ecx",$A); &and ("esi",$E); &mov ($E,$Doff); # e becomes d, which is e in next iteration @@ -69,14 +68,14 @@ sub BODY_00_15() { &mov ("edi",$A); &add ($T,"esi"); # T += Ch(e,f,g) - &ror ("ecx",2); + &ror ("ecx",22-13); &add ($T,$Hoff); # T += h - &ror ("edi",13); + &xor ("ecx",$A); + &ror ("ecx",13-2); &mov ("esi",$Boff); - &xor ("ecx","edi"); - &ror ("edi",22-13); + &xor ("ecx",$A); + &ror ("ecx",2); # Sigma0(a) &add ($E,$T); # d += T - &xor ("ecx","edi"); # Sigma0(a) &mov ("edi",$Coff); &add ($T,"ecx"); # T += Sigma0(a) @@ -168,23 +167,22 @@ sub BODY_00_15() { &set_label("16_63",16); &mov ("esi",$T); &mov ("ecx",&DWP(4*(8+15+16-14),"esp")); - &shr ($T,3); - &ror ("esi",7); - &xor ($T,"esi"); &ror ("esi",18-7); &mov ("edi","ecx"); - &xor ($T,"esi"); # T = sigma0(X[-15]) + &xor ("esi",$T); + &ror ("esi",7); + &shr ($T,3); - &shr ("ecx",10); - &mov ("esi",&DWP(4*(8+15+16),"esp")); - &ror ("edi",17); - &xor ("ecx","edi"); &ror ("edi",19-17); - &add ($T,"esi"); # T += X[-16] - &xor ("edi","ecx") # sigma1(X[-2]) + &xor ($T,"esi"); # T = sigma0(X[-15]) + &xor ("edi","ecx"); + &ror ("edi",17); + &shr ("ecx",10); + &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16] + &xor ("edi","ecx"); # sigma1(X[-2]) - &add ($T,"edi"); # T += sigma1(X[-2]) - # &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1) + &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7] + # &add ($T,"edi"); # T += sigma1(X[-2]) # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0] &BODY_00_15(1); diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.S b/app/openssl/crypto/sha/asm/sha256-armv4.S new file mode 120000 index 00000000..d4f53c1d --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha256-armv4.S @@ -0,0 +1 @@ +sha256-armv4.s \ No newline at end of file diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.pl b/app/openssl/crypto/sha/asm/sha256-armv4.pl index 492cb62b..9c84e8d9 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha256-armv4.pl @@ -18,11 +18,16 @@ # Rescheduling for dual-issue pipeline resulted in 22% improvement on # Cortex A8 core and ~20 cycles per processed byte. +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~17 cycles per processed byte. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; $t0="r0"; -$inp="r1"; +$inp="r1"; $t3="r1"; $len="r2"; $t1="r2"; $T1="r3"; $A="r4"; @@ -46,6 +51,9 @@ sub BODY_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___ if ($i<16); +#if __ARM_ARCH__>=7 + ldr $T1,[$inp],#4 +#else ldrb $T1,[$inp,#3] @ $i ldrb $t2,[$inp,#2] ldrb $t1,[$inp,#1] @@ -53,16 +61,24 @@ $code.=<<___ if ($i<16); orr $T1,$T1,$t2,lsl#8 orr $T1,$T1,$t1,lsl#16 orr $T1,$T1,$t0,lsl#24 - `"str $inp,[sp,#17*4]" if ($i==15)` +#endif ___ $code.=<<___; - ldr $t2,[$Ktbl],#4 @ *K256++ mov $t0,$e,ror#$Sigma1[0] - str $T1,[sp,#`$i%16`*4] + ldr $t2,[$Ktbl],#4 @ *K256++ eor $t0,$t0,$e,ror#$Sigma1[1] eor $t1,$f,$g +#if $i>=16 + add $T1,$T1,$t3 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev $T1,$T1 +#endif +#if $i==15 + str $inp,[sp,#17*4] @ leave room for $t3 +#endif eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) and $t1,$t1,$e + str $T1,[sp,#`$i%16`*4] add $T1,$T1,$t0 eor $t1,$t1,$g @ Ch(e,f,g) add $T1,$T1,$h @@ -71,6 +87,9 @@ $code.=<<___; eor $h,$h,$a,ror#$Sigma0[1] add $T1,$T1,$t2 eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) +#if $i>=15 + ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx +#endif orr $t0,$a,$b and $t1,$a,$b and $t0,$t0,$c @@ -85,24 +104,26 @@ sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i ldr $t2,[sp,#`($i+14)%16`*4] + mov $t0,$t3,ror#$sigma0[0] ldr $T1,[sp,#`($i+0)%16`*4] - mov $t0,$t1,ror#$sigma0[0] - ldr $inp,[sp,#`($i+9)%16`*4] - eor $t0,$t0,$t1,ror#$sigma0[1] - eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) - mov $t1,$t2,ror#$sigma1[0] + eor $t0,$t0,$t3,ror#$sigma0[1] + ldr $t1,[sp,#`($i+9)%16`*4] + eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1]) + mov $t3,$t2,ror#$sigma1[0] add $T1,$T1,$t0 - eor $t1,$t1,$t2,ror#$sigma1[1] - add $T1,$T1,$inp - eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) + eor $t3,$t3,$t2,ror#$sigma1[1] add $T1,$T1,$t1 + eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) + @ add $T1,$T1,$t3 ___ &BODY_00_15(@_); } $code=<<___; +#include "arm_arch.h" + .text .code 32 @@ -132,7 +153,7 @@ K256: sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add $len,$inp,$len,lsl#6 @ len to point at the end of inp - stmdb sp!,{$ctx,$inp,$len,r4-r12,lr} + stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} sub $Ktbl,r3,#256 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) @@ -171,10 +192,14 @@ $code.=<<___; bne .Loop add sp,sp,#`16+3`*4 @ destroy frame - ldmia sp!,{r4-r12,lr} +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size sha256_block_data_order,.-sha256_block_data_order .asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " .align 2 diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.s b/app/openssl/crypto/sha/asm/sha256-armv4.s index ee903dc4..9c20a63c 100644 --- a/app/openssl/crypto/sha/asm/sha256-armv4.s +++ b/app/openssl/crypto/sha/asm/sha256-armv4.s @@ -1,3 +1,5 @@ +#include "arm_arch.h" + .text .code 32 @@ -27,11 +29,14 @@ K256: sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add r2,r1,r2,lsl#6 @ len to point at the end of inp - stmdb sp!,{r0,r1,r2,r4-r12,lr} + stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} sub r14,r3,#256 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 0 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -39,14 +44,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r8,ror#6 - str r3,[sp,#0*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r8,ror#11 eor r2,r9,r10 +#if 0>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 0==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 + str r3,[sp,#0*4] add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) add r3,r3,r11 @@ -55,6 +68,9 @@ sha256_block_data_order: eor r11,r11,r4,ror#13 add r3,r3,r12 eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 0>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif orr r0,r4,r5 and r2,r4,r5 and r0,r0,r6 @@ -62,6 +78,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r7,r7,r3 add r11,r11,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 1 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -69,14 +88,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r7,ror#6 - str r3,[sp,#1*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r7,ror#11 eor r2,r8,r9 +#if 1>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 1==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 + str r3,[sp,#1*4] add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) add r3,r3,r10 @@ -85,6 +112,9 @@ sha256_block_data_order: eor r10,r10,r11,ror#13 add r3,r3,r12 eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 1>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif orr r0,r11,r4 and r2,r11,r4 and r0,r0,r5 @@ -92,6 +122,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r6,r6,r3 add r10,r10,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 2 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -99,14 +132,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r6,ror#6 - str r3,[sp,#2*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r6,ror#11 eor r2,r7,r8 +#if 2>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 2==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 + str r3,[sp,#2*4] add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) add r3,r3,r9 @@ -115,6 +156,9 @@ sha256_block_data_order: eor r9,r9,r10,ror#13 add r3,r3,r12 eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 2>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif orr r0,r10,r11 and r2,r10,r11 and r0,r0,r4 @@ -122,6 +166,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r5,r5,r3 add r9,r9,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 3 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -129,14 +176,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r5,ror#6 - str r3,[sp,#3*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r5,ror#11 eor r2,r6,r7 +#if 3>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 3==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 + str r3,[sp,#3*4] add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) add r3,r3,r8 @@ -145,6 +200,9 @@ sha256_block_data_order: eor r8,r8,r9,ror#13 add r3,r3,r12 eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 3>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif orr r0,r9,r10 and r2,r9,r10 and r0,r0,r11 @@ -152,6 +210,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r4,r4,r3 add r8,r8,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 4 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -159,14 +220,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r4,ror#6 - str r3,[sp,#4*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r4,ror#11 eor r2,r5,r6 +#if 4>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 4==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 + str r3,[sp,#4*4] add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) add r3,r3,r7 @@ -175,6 +244,9 @@ sha256_block_data_order: eor r7,r7,r8,ror#13 add r3,r3,r12 eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 4>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif orr r0,r8,r9 and r2,r8,r9 and r0,r0,r10 @@ -182,6 +254,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r11,r11,r3 add r7,r7,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 5 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -189,14 +264,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r11,ror#6 - str r3,[sp,#5*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r11,ror#11 eor r2,r4,r5 +#if 5>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 5==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 + str r3,[sp,#5*4] add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) add r3,r3,r6 @@ -205,6 +288,9 @@ sha256_block_data_order: eor r6,r6,r7,ror#13 add r3,r3,r12 eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 5>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif orr r0,r7,r8 and r2,r7,r8 and r0,r0,r9 @@ -212,6 +298,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r10,r10,r3 add r6,r6,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 6 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -219,14 +308,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r10,ror#6 - str r3,[sp,#6*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r10,ror#11 eor r2,r11,r4 +#if 6>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 6==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 + str r3,[sp,#6*4] add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) add r3,r3,r5 @@ -235,6 +332,9 @@ sha256_block_data_order: eor r5,r5,r6,ror#13 add r3,r3,r12 eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 6>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif orr r0,r6,r7 and r2,r6,r7 and r0,r0,r8 @@ -242,6 +342,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r9,r9,r3 add r5,r5,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 7 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -249,14 +352,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r9,ror#6 - str r3,[sp,#7*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r9,ror#11 eor r2,r10,r11 +#if 7>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 7==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 + str r3,[sp,#7*4] add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) add r3,r3,r4 @@ -265,6 +376,9 @@ sha256_block_data_order: eor r4,r4,r5,ror#13 add r3,r3,r12 eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 7>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif orr r0,r5,r6 and r2,r5,r6 and r0,r0,r7 @@ -272,6 +386,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r8,r8,r3 add r4,r4,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 8 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -279,14 +396,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r8,ror#6 - str r3,[sp,#8*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r8,ror#11 eor r2,r9,r10 +#if 8>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 8==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 + str r3,[sp,#8*4] add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) add r3,r3,r11 @@ -295,6 +420,9 @@ sha256_block_data_order: eor r11,r11,r4,ror#13 add r3,r3,r12 eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 8>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif orr r0,r4,r5 and r2,r4,r5 and r0,r0,r6 @@ -302,6 +430,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r7,r7,r3 add r11,r11,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 9 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -309,14 +440,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r7,ror#6 - str r3,[sp,#9*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r7,ror#11 eor r2,r8,r9 +#if 9>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 9==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 + str r3,[sp,#9*4] add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) add r3,r3,r10 @@ -325,6 +464,9 @@ sha256_block_data_order: eor r10,r10,r11,ror#13 add r3,r3,r12 eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 9>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif orr r0,r11,r4 and r2,r11,r4 and r0,r0,r5 @@ -332,6 +474,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r6,r6,r3 add r10,r10,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 10 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -339,14 +484,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r6,ror#6 - str r3,[sp,#10*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r6,ror#11 eor r2,r7,r8 +#if 10>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 10==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 + str r3,[sp,#10*4] add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) add r3,r3,r9 @@ -355,6 +508,9 @@ sha256_block_data_order: eor r9,r9,r10,ror#13 add r3,r3,r12 eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 10>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif orr r0,r10,r11 and r2,r10,r11 and r0,r0,r4 @@ -362,6 +518,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r5,r5,r3 add r9,r9,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 11 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -369,14 +528,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r5,ror#6 - str r3,[sp,#11*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r5,ror#11 eor r2,r6,r7 +#if 11>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 11==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 + str r3,[sp,#11*4] add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) add r3,r3,r8 @@ -385,6 +552,9 @@ sha256_block_data_order: eor r8,r8,r9,ror#13 add r3,r3,r12 eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 11>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif orr r0,r9,r10 and r2,r9,r10 and r0,r0,r11 @@ -392,6 +562,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r4,r4,r3 add r8,r8,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 12 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -399,14 +572,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r4,ror#6 - str r3,[sp,#12*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r4,ror#11 eor r2,r5,r6 +#if 12>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 12==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 + str r3,[sp,#12*4] add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) add r3,r3,r7 @@ -415,6 +596,9 @@ sha256_block_data_order: eor r7,r7,r8,ror#13 add r3,r3,r12 eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 12>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif orr r0,r8,r9 and r2,r8,r9 and r0,r0,r10 @@ -422,6 +606,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r11,r11,r3 add r7,r7,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 13 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -429,14 +616,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r11,ror#6 - str r3,[sp,#13*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r11,ror#11 eor r2,r4,r5 +#if 13>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 13==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 + str r3,[sp,#13*4] add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) add r3,r3,r6 @@ -445,6 +640,9 @@ sha256_block_data_order: eor r6,r6,r7,ror#13 add r3,r3,r12 eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 13>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif orr r0,r7,r8 and r2,r7,r8 and r0,r0,r9 @@ -452,6 +650,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r10,r10,r3 add r6,r6,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 14 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -459,14 +660,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r10,ror#6 - str r3,[sp,#14*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r10,ror#11 eor r2,r11,r4 +#if 14>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 14==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 + str r3,[sp,#14*4] add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) add r3,r3,r5 @@ -475,6 +684,9 @@ sha256_block_data_order: eor r5,r5,r6,ror#13 add r3,r3,r12 eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 14>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif orr r0,r6,r7 and r2,r6,r7 and r0,r0,r8 @@ -482,6 +694,9 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r9,r9,r3 add r5,r5,r0 +#if __ARM_ARCH__>=7 + ldr r3,[r1],#4 +#else ldrb r3,[r1,#3] @ 15 ldrb r12,[r1,#2] ldrb r2,[r1,#1] @@ -489,14 +704,22 @@ sha256_block_data_order: orr r3,r3,r12,lsl#8 orr r3,r3,r2,lsl#16 orr r3,r3,r0,lsl#24 - str r1,[sp,#17*4] - ldr r12,[r14],#4 @ *K256++ +#endif mov r0,r9,ror#6 - str r3,[sp,#15*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r9,ror#11 eor r2,r10,r11 +#if 15>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 15==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 + str r3,[sp,#15*4] add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) add r3,r3,r4 @@ -505,6 +728,9 @@ sha256_block_data_order: eor r4,r4,r5,ror#13 add r3,r3,r12 eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 15>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif orr r0,r5,r6 and r2,r5,r6 and r0,r0,r7 @@ -513,26 +739,34 @@ sha256_block_data_order: add r8,r8,r3 add r4,r4,r0 .Lrounds_16_xx: - ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#1*4] @ 16 ldr r12,[sp,#14*4] + mov r0,r1,ror#7 ldr r3,[sp,#0*4] - mov r0,r2,ror#7 - ldr r1,[sp,#9*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#9*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r8,ror#6 - str r3,[sp,#0*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r8,ror#11 eor r2,r9,r10 +#if 16>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 16==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 + str r3,[sp,#0*4] add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) add r3,r3,r11 @@ -541,6 +775,9 @@ sha256_block_data_order: eor r11,r11,r4,ror#13 add r3,r3,r12 eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 16>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif orr r0,r4,r5 and r2,r4,r5 and r0,r0,r6 @@ -548,26 +785,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r7,r7,r3 add r11,r11,r0 - ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#2*4] @ 17 ldr r12,[sp,#15*4] + mov r0,r1,ror#7 ldr r3,[sp,#1*4] - mov r0,r2,ror#7 - ldr r1,[sp,#10*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#10*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r7,ror#6 - str r3,[sp,#1*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r7,ror#11 eor r2,r8,r9 +#if 17>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 17==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 + str r3,[sp,#1*4] add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) add r3,r3,r10 @@ -576,6 +821,9 @@ sha256_block_data_order: eor r10,r10,r11,ror#13 add r3,r3,r12 eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 17>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif orr r0,r11,r4 and r2,r11,r4 and r0,r0,r5 @@ -583,26 +831,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r6,r6,r3 add r10,r10,r0 - ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#3*4] @ 18 ldr r12,[sp,#0*4] + mov r0,r1,ror#7 ldr r3,[sp,#2*4] - mov r0,r2,ror#7 - ldr r1,[sp,#11*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#11*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r6,ror#6 - str r3,[sp,#2*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r6,ror#11 eor r2,r7,r8 +#if 18>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 18==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 + str r3,[sp,#2*4] add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) add r3,r3,r9 @@ -611,6 +867,9 @@ sha256_block_data_order: eor r9,r9,r10,ror#13 add r3,r3,r12 eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 18>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif orr r0,r10,r11 and r2,r10,r11 and r0,r0,r4 @@ -618,26 +877,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r5,r5,r3 add r9,r9,r0 - ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#4*4] @ 19 ldr r12,[sp,#1*4] + mov r0,r1,ror#7 ldr r3,[sp,#3*4] - mov r0,r2,ror#7 - ldr r1,[sp,#12*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#12*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r5,ror#6 - str r3,[sp,#3*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r5,ror#11 eor r2,r6,r7 +#if 19>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 19==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 + str r3,[sp,#3*4] add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) add r3,r3,r8 @@ -646,6 +913,9 @@ sha256_block_data_order: eor r8,r8,r9,ror#13 add r3,r3,r12 eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 19>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif orr r0,r9,r10 and r2,r9,r10 and r0,r0,r11 @@ -653,26 +923,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r4,r4,r3 add r8,r8,r0 - ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#5*4] @ 20 ldr r12,[sp,#2*4] + mov r0,r1,ror#7 ldr r3,[sp,#4*4] - mov r0,r2,ror#7 - ldr r1,[sp,#13*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#13*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r4,ror#6 - str r3,[sp,#4*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r4,ror#11 eor r2,r5,r6 +#if 20>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 20==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 + str r3,[sp,#4*4] add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) add r3,r3,r7 @@ -681,6 +959,9 @@ sha256_block_data_order: eor r7,r7,r8,ror#13 add r3,r3,r12 eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 20>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif orr r0,r8,r9 and r2,r8,r9 and r0,r0,r10 @@ -688,26 +969,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r11,r11,r3 add r7,r7,r0 - ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#6*4] @ 21 ldr r12,[sp,#3*4] + mov r0,r1,ror#7 ldr r3,[sp,#5*4] - mov r0,r2,ror#7 - ldr r1,[sp,#14*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#14*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r11,ror#6 - str r3,[sp,#5*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r11,ror#11 eor r2,r4,r5 +#if 21>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 21==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 + str r3,[sp,#5*4] add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) add r3,r3,r6 @@ -716,6 +1005,9 @@ sha256_block_data_order: eor r6,r6,r7,ror#13 add r3,r3,r12 eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 21>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif orr r0,r7,r8 and r2,r7,r8 and r0,r0,r9 @@ -723,26 +1015,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r10,r10,r3 add r6,r6,r0 - ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#7*4] @ 22 ldr r12,[sp,#4*4] + mov r0,r1,ror#7 ldr r3,[sp,#6*4] - mov r0,r2,ror#7 - ldr r1,[sp,#15*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#15*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r10,ror#6 - str r3,[sp,#6*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r10,ror#11 eor r2,r11,r4 +#if 22>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 22==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 + str r3,[sp,#6*4] add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) add r3,r3,r5 @@ -751,6 +1051,9 @@ sha256_block_data_order: eor r5,r5,r6,ror#13 add r3,r3,r12 eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 22>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif orr r0,r6,r7 and r2,r6,r7 and r0,r0,r8 @@ -758,26 +1061,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r9,r9,r3 add r5,r5,r0 - ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#8*4] @ 23 ldr r12,[sp,#5*4] + mov r0,r1,ror#7 ldr r3,[sp,#7*4] - mov r0,r2,ror#7 - ldr r1,[sp,#0*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#0*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r9,ror#6 - str r3,[sp,#7*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r9,ror#11 eor r2,r10,r11 +#if 23>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 23==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 + str r3,[sp,#7*4] add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) add r3,r3,r4 @@ -786,6 +1097,9 @@ sha256_block_data_order: eor r4,r4,r5,ror#13 add r3,r3,r12 eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 23>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif orr r0,r5,r6 and r2,r5,r6 and r0,r0,r7 @@ -793,26 +1107,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r8,r8,r3 add r4,r4,r0 - ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#9*4] @ 24 ldr r12,[sp,#6*4] + mov r0,r1,ror#7 ldr r3,[sp,#8*4] - mov r0,r2,ror#7 - ldr r1,[sp,#1*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#1*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r8,ror#6 - str r3,[sp,#8*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r8,ror#11 eor r2,r9,r10 +#if 24>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 24==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r8,ror#25 @ Sigma1(e) and r2,r2,r8 + str r3,[sp,#8*4] add r3,r3,r0 eor r2,r2,r10 @ Ch(e,f,g) add r3,r3,r11 @@ -821,6 +1143,9 @@ sha256_block_data_order: eor r11,r11,r4,ror#13 add r3,r3,r12 eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 24>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif orr r0,r4,r5 and r2,r4,r5 and r0,r0,r6 @@ -828,26 +1153,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r7,r7,r3 add r11,r11,r0 - ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#10*4] @ 25 ldr r12,[sp,#7*4] + mov r0,r1,ror#7 ldr r3,[sp,#9*4] - mov r0,r2,ror#7 - ldr r1,[sp,#2*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#2*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r7,ror#6 - str r3,[sp,#9*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r7,ror#11 eor r2,r8,r9 +#if 25>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 25==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r7,ror#25 @ Sigma1(e) and r2,r2,r7 + str r3,[sp,#9*4] add r3,r3,r0 eor r2,r2,r9 @ Ch(e,f,g) add r3,r3,r10 @@ -856,6 +1189,9 @@ sha256_block_data_order: eor r10,r10,r11,ror#13 add r3,r3,r12 eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 25>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif orr r0,r11,r4 and r2,r11,r4 and r0,r0,r5 @@ -863,26 +1199,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r6,r6,r3 add r10,r10,r0 - ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#11*4] @ 26 ldr r12,[sp,#8*4] + mov r0,r1,ror#7 ldr r3,[sp,#10*4] - mov r0,r2,ror#7 - ldr r1,[sp,#3*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#3*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r6,ror#6 - str r3,[sp,#10*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r6,ror#11 eor r2,r7,r8 +#if 26>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 26==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r6,ror#25 @ Sigma1(e) and r2,r2,r6 + str r3,[sp,#10*4] add r3,r3,r0 eor r2,r2,r8 @ Ch(e,f,g) add r3,r3,r9 @@ -891,6 +1235,9 @@ sha256_block_data_order: eor r9,r9,r10,ror#13 add r3,r3,r12 eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 26>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif orr r0,r10,r11 and r2,r10,r11 and r0,r0,r4 @@ -898,26 +1245,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r5,r5,r3 add r9,r9,r0 - ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#12*4] @ 27 ldr r12,[sp,#9*4] + mov r0,r1,ror#7 ldr r3,[sp,#11*4] - mov r0,r2,ror#7 - ldr r1,[sp,#4*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#4*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r5,ror#6 - str r3,[sp,#11*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r5,ror#11 eor r2,r6,r7 +#if 27>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 27==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r5,ror#25 @ Sigma1(e) and r2,r2,r5 + str r3,[sp,#11*4] add r3,r3,r0 eor r2,r2,r7 @ Ch(e,f,g) add r3,r3,r8 @@ -926,6 +1281,9 @@ sha256_block_data_order: eor r8,r8,r9,ror#13 add r3,r3,r12 eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 27>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif orr r0,r9,r10 and r2,r9,r10 and r0,r0,r11 @@ -933,26 +1291,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r4,r4,r3 add r8,r8,r0 - ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#13*4] @ 28 ldr r12,[sp,#10*4] + mov r0,r1,ror#7 ldr r3,[sp,#12*4] - mov r0,r2,ror#7 - ldr r1,[sp,#5*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#5*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r4,ror#6 - str r3,[sp,#12*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r4,ror#11 eor r2,r5,r6 +#if 28>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 28==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r4,ror#25 @ Sigma1(e) and r2,r2,r4 + str r3,[sp,#12*4] add r3,r3,r0 eor r2,r2,r6 @ Ch(e,f,g) add r3,r3,r7 @@ -961,6 +1327,9 @@ sha256_block_data_order: eor r7,r7,r8,ror#13 add r3,r3,r12 eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 28>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif orr r0,r8,r9 and r2,r8,r9 and r0,r0,r10 @@ -968,26 +1337,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r11,r11,r3 add r7,r7,r0 - ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#14*4] @ 29 ldr r12,[sp,#11*4] + mov r0,r1,ror#7 ldr r3,[sp,#13*4] - mov r0,r2,ror#7 - ldr r1,[sp,#6*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#6*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r11,ror#6 - str r3,[sp,#13*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r11,ror#11 eor r2,r4,r5 +#if 29>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 29==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r11,ror#25 @ Sigma1(e) and r2,r2,r11 + str r3,[sp,#13*4] add r3,r3,r0 eor r2,r2,r5 @ Ch(e,f,g) add r3,r3,r6 @@ -996,6 +1373,9 @@ sha256_block_data_order: eor r6,r6,r7,ror#13 add r3,r3,r12 eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 29>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif orr r0,r7,r8 and r2,r7,r8 and r0,r0,r9 @@ -1003,26 +1383,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r10,r10,r3 add r6,r6,r0 - ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#15*4] @ 30 ldr r12,[sp,#12*4] + mov r0,r1,ror#7 ldr r3,[sp,#14*4] - mov r0,r2,ror#7 - ldr r1,[sp,#7*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#7*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r10,ror#6 - str r3,[sp,#14*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r10,ror#11 eor r2,r11,r4 +#if 30>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 30==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r10,ror#25 @ Sigma1(e) and r2,r2,r10 + str r3,[sp,#14*4] add r3,r3,r0 eor r2,r2,r4 @ Ch(e,f,g) add r3,r3,r5 @@ -1031,6 +1419,9 @@ sha256_block_data_order: eor r5,r5,r6,ror#13 add r3,r3,r12 eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 30>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif orr r0,r6,r7 and r2,r6,r7 and r0,r0,r8 @@ -1038,26 +1429,34 @@ sha256_block_data_order: orr r0,r0,r2 @ Maj(a,b,c) add r9,r9,r3 add r5,r5,r0 - ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#0*4] @ 31 ldr r12,[sp,#13*4] + mov r0,r1,ror#7 ldr r3,[sp,#15*4] - mov r0,r2,ror#7 - ldr r1,[sp,#8*4] - eor r0,r0,r2,ror#18 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - mov r2,r12,ror#17 + eor r0,r0,r1,ror#18 + ldr r2,[sp,#8*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 add r3,r3,r0 - eor r2,r2,r12,ror#19 - add r3,r3,r1 - eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) + eor r1,r1,r12,ror#19 add r3,r3,r2 - ldr r12,[r14],#4 @ *K256++ + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 mov r0,r9,ror#6 - str r3,[sp,#15*4] + ldr r12,[r14],#4 @ *K256++ eor r0,r0,r9,ror#11 eor r2,r10,r11 +#if 31>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r3,r3 +#endif +#if 31==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif eor r0,r0,r9,ror#25 @ Sigma1(e) and r2,r2,r9 + str r3,[sp,#15*4] add r3,r3,r0 eor r2,r2,r11 @ Ch(e,f,g) add r3,r3,r4 @@ -1066,6 +1465,9 @@ sha256_block_data_order: eor r4,r4,r5,ror#13 add r3,r3,r12 eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 31>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif orr r0,r5,r6 and r2,r5,r6 and r0,r0,r7 @@ -1102,10 +1504,14 @@ sha256_block_data_order: bne .Loop add sp,sp,#19*4 @ destroy frame - ldmia sp!,{r4-r12,lr} +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size sha256_block_data_order,.-sha256_block_data_order .asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " .align 2 diff --git a/app/openssl/crypto/sha/asm/sha256-mips.S b/app/openssl/crypto/sha/asm/sha256-mips.S new file mode 100644 index 00000000..2bd728e9 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha256-mips.S @@ -0,0 +1,1998 @@ +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +.text +.set noat +#if !defined(__vxworks) || defined(__pic__) +.option pic2 +#endif + +.align 5 +.globl sha256_block_data_order +.ent sha256_block_data_order +sha256_block_data_order: + .frame $29,128,$31 + .mask 3237937152,-4 + .set noreorder + .cpload $25 + sub $29,128 + sw $31,128-1*4($29) + sw $30,128-2*4($29) + sw $23,128-3*4($29) + sw $22,128-4*4($29) + sw $21,128-5*4($29) + sw $20,128-6*4($29) + sw $19,128-7*4($29) + sw $18,128-8*4($29) + sw $17,128-9*4($29) + sw $16,128-10*4($29) + sll $23,$6,6 + .set reorder + la $6,K256 # PIC-ified 'load address' + + lw $1,0*4($4) # load context + lw $2,1*4($4) + lw $3,2*4($4) + lw $7,3*4($4) + lw $24,4*4($4) + lw $25,5*4($4) + lw $30,6*4($4) + lw $31,7*4($4) + + add $23,$5 # pointer to the end of input + sw $23,16*4($29) + b .Loop + +.align 5 +.Loop: + lwl $8,3($5) + lwr $8,0($5) + lwl $9,7($5) + lwr $9,4($5) + srl $13,$8,24 # byte swap(0) + srl $14,$8,8 + andi $15,$8,0xFF00 + sll $8,$8,24 + andi $14,0xFF00 + sll $15,$15,8 + or $8,$13 + or $14,$15 + or $8,$14 + addu $12,$8,$31 # 0 + srl $31,$24,6 + xor $15,$25,$30 + sll $14,$24,7 + and $15,$24 + srl $13,$24,11 + xor $31,$14 + sll $14,$24,21 + xor $31,$13 + srl $13,$24,25 + xor $31,$14 + sll $14,$24,26 + xor $31,$13 + xor $15,$30 # Ch(e,f,g) + xor $13,$14,$31 # Sigma1(e) + + srl $31,$1,2 + addu $12,$15 + lw $15,0($6) # K[0] + sll $14,$1,10 + addu $12,$13 + srl $13,$1,13 + xor $31,$14 + sll $14,$1,19 + xor $31,$13 + srl $13,$1,22 + xor $31,$14 + sll $14,$1,30 + xor $31,$13 + sw $8,0($29) # offload to ring buffer + xor $31,$14 # Sigma0(a) + + or $13,$1,$2 + and $14,$1,$2 + and $13,$3 + or $14,$13 # Maj(a,b,c) + addu $12,$15 # +=K[0] + addu $31,$14 + + addu $7,$12 + addu $31,$12 + lwl $10,11($5) + lwr $10,8($5) + srl $14,$9,24 # byte swap(1) + srl $15,$9,8 + andi $16,$9,0xFF00 + sll $9,$9,24 + andi $15,0xFF00 + sll $16,$16,8 + or $9,$14 + or $15,$16 + or $9,$15 + addu $13,$9,$30 # 1 + srl $30,$7,6 + xor $16,$24,$25 + sll $15,$7,7 + and $16,$7 + srl $14,$7,11 + xor $30,$15 + sll $15,$7,21 + xor $30,$14 + srl $14,$7,25 + xor $30,$15 + sll $15,$7,26 + xor $30,$14 + xor $16,$25 # Ch(e,f,g) + xor $14,$15,$30 # Sigma1(e) + + srl $30,$31,2 + addu $13,$16 + lw $16,4($6) # K[1] + sll $15,$31,10 + addu $13,$14 + srl $14,$31,13 + xor $30,$15 + sll $15,$31,19 + xor $30,$14 + srl $14,$31,22 + xor $30,$15 + sll $15,$31,30 + xor $30,$14 + sw $9,4($29) # offload to ring buffer + xor $30,$15 # Sigma0(a) + + or $14,$31,$1 + and $15,$31,$1 + and $14,$2 + or $15,$14 # Maj(a,b,c) + addu $13,$16 # +=K[1] + addu $30,$15 + + addu $3,$13 + addu $30,$13 + lwl $11,15($5) + lwr $11,12($5) + srl $15,$10,24 # byte swap(2) + srl $16,$10,8 + andi $17,$10,0xFF00 + sll $10,$10,24 + andi $16,0xFF00 + sll $17,$17,8 + or $10,$15 + or $16,$17 + or $10,$16 + addu $14,$10,$25 # 2 + srl $25,$3,6 + xor $17,$7,$24 + sll $16,$3,7 + and $17,$3 + srl $15,$3,11 + xor $25,$16 + sll $16,$3,21 + xor $25,$15 + srl $15,$3,25 + xor $25,$16 + sll $16,$3,26 + xor $25,$15 + xor $17,$24 # Ch(e,f,g) + xor $15,$16,$25 # Sigma1(e) + + srl $25,$30,2 + addu $14,$17 + lw $17,8($6) # K[2] + sll $16,$30,10 + addu $14,$15 + srl $15,$30,13 + xor $25,$16 + sll $16,$30,19 + xor $25,$15 + srl $15,$30,22 + xor $25,$16 + sll $16,$30,30 + xor $25,$15 + sw $10,8($29) # offload to ring buffer + xor $25,$16 # Sigma0(a) + + or $15,$30,$31 + and $16,$30,$31 + and $15,$1 + or $16,$15 # Maj(a,b,c) + addu $14,$17 # +=K[2] + addu $25,$16 + + addu $2,$14 + addu $25,$14 + lwl $12,19($5) + lwr $12,16($5) + srl $16,$11,24 # byte swap(3) + srl $17,$11,8 + andi $18,$11,0xFF00 + sll $11,$11,24 + andi $17,0xFF00 + sll $18,$18,8 + or $11,$16 + or $17,$18 + or $11,$17 + addu $15,$11,$24 # 3 + srl $24,$2,6 + xor $18,$3,$7 + sll $17,$2,7 + and $18,$2 + srl $16,$2,11 + xor $24,$17 + sll $17,$2,21 + xor $24,$16 + srl $16,$2,25 + xor $24,$17 + sll $17,$2,26 + xor $24,$16 + xor $18,$7 # Ch(e,f,g) + xor $16,$17,$24 # Sigma1(e) + + srl $24,$25,2 + addu $15,$18 + lw $18,12($6) # K[3] + sll $17,$25,10 + addu $15,$16 + srl $16,$25,13 + xor $24,$17 + sll $17,$25,19 + xor $24,$16 + srl $16,$25,22 + xor $24,$17 + sll $17,$25,30 + xor $24,$16 + sw $11,12($29) # offload to ring buffer + xor $24,$17 # Sigma0(a) + + or $16,$25,$30 + and $17,$25,$30 + and $16,$31 + or $17,$16 # Maj(a,b,c) + addu $15,$18 # +=K[3] + addu $24,$17 + + addu $1,$15 + addu $24,$15 + lwl $13,23($5) + lwr $13,20($5) + srl $17,$12,24 # byte swap(4) + srl $18,$12,8 + andi $19,$12,0xFF00 + sll $12,$12,24 + andi $18,0xFF00 + sll $19,$19,8 + or $12,$17 + or $18,$19 + or $12,$18 + addu $16,$12,$7 # 4 + srl $7,$1,6 + xor $19,$2,$3 + sll $18,$1,7 + and $19,$1 + srl $17,$1,11 + xor $7,$18 + sll $18,$1,21 + xor $7,$17 + srl $17,$1,25 + xor $7,$18 + sll $18,$1,26 + xor $7,$17 + xor $19,$3 # Ch(e,f,g) + xor $17,$18,$7 # Sigma1(e) + + srl $7,$24,2 + addu $16,$19 + lw $19,16($6) # K[4] + sll $18,$24,10 + addu $16,$17 + srl $17,$24,13 + xor $7,$18 + sll $18,$24,19 + xor $7,$17 + srl $17,$24,22 + xor $7,$18 + sll $18,$24,30 + xor $7,$17 + sw $12,16($29) # offload to ring buffer + xor $7,$18 # Sigma0(a) + + or $17,$24,$25 + and $18,$24,$25 + and $17,$30 + or $18,$17 # Maj(a,b,c) + addu $16,$19 # +=K[4] + addu $7,$18 + + addu $31,$16 + addu $7,$16 + lwl $14,27($5) + lwr $14,24($5) + srl $18,$13,24 # byte swap(5) + srl $19,$13,8 + andi $20,$13,0xFF00 + sll $13,$13,24 + andi $19,0xFF00 + sll $20,$20,8 + or $13,$18 + or $19,$20 + or $13,$19 + addu $17,$13,$3 # 5 + srl $3,$31,6 + xor $20,$1,$2 + sll $19,$31,7 + and $20,$31 + srl $18,$31,11 + xor $3,$19 + sll $19,$31,21 + xor $3,$18 + srl $18,$31,25 + xor $3,$19 + sll $19,$31,26 + xor $3,$18 + xor $20,$2 # Ch(e,f,g) + xor $18,$19,$3 # Sigma1(e) + + srl $3,$7,2 + addu $17,$20 + lw $20,20($6) # K[5] + sll $19,$7,10 + addu $17,$18 + srl $18,$7,13 + xor $3,$19 + sll $19,$7,19 + xor $3,$18 + srl $18,$7,22 + xor $3,$19 + sll $19,$7,30 + xor $3,$18 + sw $13,20($29) # offload to ring buffer + xor $3,$19 # Sigma0(a) + + or $18,$7,$24 + and $19,$7,$24 + and $18,$25 + or $19,$18 # Maj(a,b,c) + addu $17,$20 # +=K[5] + addu $3,$19 + + addu $30,$17 + addu $3,$17 + lwl $15,31($5) + lwr $15,28($5) + srl $19,$14,24 # byte swap(6) + srl $20,$14,8 + andi $21,$14,0xFF00 + sll $14,$14,24 + andi $20,0xFF00 + sll $21,$21,8 + or $14,$19 + or $20,$21 + or $14,$20 + addu $18,$14,$2 # 6 + srl $2,$30,6 + xor $21,$31,$1 + sll $20,$30,7 + and $21,$30 + srl $19,$30,11 + xor $2,$20 + sll $20,$30,21 + xor $2,$19 + srl $19,$30,25 + xor $2,$20 + sll $20,$30,26 + xor $2,$19 + xor $21,$1 # Ch(e,f,g) + xor $19,$20,$2 # Sigma1(e) + + srl $2,$3,2 + addu $18,$21 + lw $21,24($6) # K[6] + sll $20,$3,10 + addu $18,$19 + srl $19,$3,13 + xor $2,$20 + sll $20,$3,19 + xor $2,$19 + srl $19,$3,22 + xor $2,$20 + sll $20,$3,30 + xor $2,$19 + sw $14,24($29) # offload to ring buffer + xor $2,$20 # Sigma0(a) + + or $19,$3,$7 + and $20,$3,$7 + and $19,$24 + or $20,$19 # Maj(a,b,c) + addu $18,$21 # +=K[6] + addu $2,$20 + + addu $25,$18 + addu $2,$18 + lwl $16,35($5) + lwr $16,32($5) + srl $20,$15,24 # byte swap(7) + srl $21,$15,8 + andi $22,$15,0xFF00 + sll $15,$15,24 + andi $21,0xFF00 + sll $22,$22,8 + or $15,$20 + or $21,$22 + or $15,$21 + addu $19,$15,$1 # 7 + srl $1,$25,6 + xor $22,$30,$31 + sll $21,$25,7 + and $22,$25 + srl $20,$25,11 + xor $1,$21 + sll $21,$25,21 + xor $1,$20 + srl $20,$25,25 + xor $1,$21 + sll $21,$25,26 + xor $1,$20 + xor $22,$31 # Ch(e,f,g) + xor $20,$21,$1 # Sigma1(e) + + srl $1,$2,2 + addu $19,$22 + lw $22,28($6) # K[7] + sll $21,$2,10 + addu $19,$20 + srl $20,$2,13 + xor $1,$21 + sll $21,$2,19 + xor $1,$20 + srl $20,$2,22 + xor $1,$21 + sll $21,$2,30 + xor $1,$20 + sw $15,28($29) # offload to ring buffer + xor $1,$21 # Sigma0(a) + + or $20,$2,$3 + and $21,$2,$3 + and $20,$7 + or $21,$20 # Maj(a,b,c) + addu $19,$22 # +=K[7] + addu $1,$21 + + addu $24,$19 + addu $1,$19 + lwl $17,39($5) + lwr $17,36($5) + srl $21,$16,24 # byte swap(8) + srl $22,$16,8 + andi $23,$16,0xFF00 + sll $16,$16,24 + andi $22,0xFF00 + sll $23,$23,8 + or $16,$21 + or $22,$23 + or $16,$22 + addu $20,$16,$31 # 8 + srl $31,$24,6 + xor $23,$25,$30 + sll $22,$24,7 + and $23,$24 + srl $21,$24,11 + xor $31,$22 + sll $22,$24,21 + xor $31,$21 + srl $21,$24,25 + xor $31,$22 + sll $22,$24,26 + xor $31,$21 + xor $23,$30 # Ch(e,f,g) + xor $21,$22,$31 # Sigma1(e) + + srl $31,$1,2 + addu $20,$23 + lw $23,32($6) # K[8] + sll $22,$1,10 + addu $20,$21 + srl $21,$1,13 + xor $31,$22 + sll $22,$1,19 + xor $31,$21 + srl $21,$1,22 + xor $31,$22 + sll $22,$1,30 + xor $31,$21 + sw $16,32($29) # offload to ring buffer + xor $31,$22 # Sigma0(a) + + or $21,$1,$2 + and $22,$1,$2 + and $21,$3 + or $22,$21 # Maj(a,b,c) + addu $20,$23 # +=K[8] + addu $31,$22 + + addu $7,$20 + addu $31,$20 + lwl $18,43($5) + lwr $18,40($5) + srl $22,$17,24 # byte swap(9) + srl $23,$17,8 + andi $8,$17,0xFF00 + sll $17,$17,24 + andi $23,0xFF00 + sll $8,$8,8 + or $17,$22 + or $23,$8 + or $17,$23 + addu $21,$17,$30 # 9 + srl $30,$7,6 + xor $8,$24,$25 + sll $23,$7,7 + and $8,$7 + srl $22,$7,11 + xor $30,$23 + sll $23,$7,21 + xor $30,$22 + srl $22,$7,25 + xor $30,$23 + sll $23,$7,26 + xor $30,$22 + xor $8,$25 # Ch(e,f,g) + xor $22,$23,$30 # Sigma1(e) + + srl $30,$31,2 + addu $21,$8 + lw $8,36($6) # K[9] + sll $23,$31,10 + addu $21,$22 + srl $22,$31,13 + xor $30,$23 + sll $23,$31,19 + xor $30,$22 + srl $22,$31,22 + xor $30,$23 + sll $23,$31,30 + xor $30,$22 + sw $17,36($29) # offload to ring buffer + xor $30,$23 # Sigma0(a) + + or $22,$31,$1 + and $23,$31,$1 + and $22,$2 + or $23,$22 # Maj(a,b,c) + addu $21,$8 # +=K[9] + addu $30,$23 + + addu $3,$21 + addu $30,$21 + lwl $19,47($5) + lwr $19,44($5) + srl $23,$18,24 # byte swap(10) + srl $8,$18,8 + andi $9,$18,0xFF00 + sll $18,$18,24 + andi $8,0xFF00 + sll $9,$9,8 + or $18,$23 + or $8,$9 + or $18,$8 + addu $22,$18,$25 # 10 + srl $25,$3,6 + xor $9,$7,$24 + sll $8,$3,7 + and $9,$3 + srl $23,$3,11 + xor $25,$8 + sll $8,$3,21 + xor $25,$23 + srl $23,$3,25 + xor $25,$8 + sll $8,$3,26 + xor $25,$23 + xor $9,$24 # Ch(e,f,g) + xor $23,$8,$25 # Sigma1(e) + + srl $25,$30,2 + addu $22,$9 + lw $9,40($6) # K[10] + sll $8,$30,10 + addu $22,$23 + srl $23,$30,13 + xor $25,$8 + sll $8,$30,19 + xor $25,$23 + srl $23,$30,22 + xor $25,$8 + sll $8,$30,30 + xor $25,$23 + sw $18,40($29) # offload to ring buffer + xor $25,$8 # Sigma0(a) + + or $23,$30,$31 + and $8,$30,$31 + and $23,$1 + or $8,$23 # Maj(a,b,c) + addu $22,$9 # +=K[10] + addu $25,$8 + + addu $2,$22 + addu $25,$22 + lwl $20,51($5) + lwr $20,48($5) + srl $8,$19,24 # byte swap(11) + srl $9,$19,8 + andi $10,$19,0xFF00 + sll $19,$19,24 + andi $9,0xFF00 + sll $10,$10,8 + or $19,$8 + or $9,$10 + or $19,$9 + addu $23,$19,$24 # 11 + srl $24,$2,6 + xor $10,$3,$7 + sll $9,$2,7 + and $10,$2 + srl $8,$2,11 + xor $24,$9 + sll $9,$2,21 + xor $24,$8 + srl $8,$2,25 + xor $24,$9 + sll $9,$2,26 + xor $24,$8 + xor $10,$7 # Ch(e,f,g) + xor $8,$9,$24 # Sigma1(e) + + srl $24,$25,2 + addu $23,$10 + lw $10,44($6) # K[11] + sll $9,$25,10 + addu $23,$8 + srl $8,$25,13 + xor $24,$9 + sll $9,$25,19 + xor $24,$8 + srl $8,$25,22 + xor $24,$9 + sll $9,$25,30 + xor $24,$8 + sw $19,44($29) # offload to ring buffer + xor $24,$9 # Sigma0(a) + + or $8,$25,$30 + and $9,$25,$30 + and $8,$31 + or $9,$8 # Maj(a,b,c) + addu $23,$10 # +=K[11] + addu $24,$9 + + addu $1,$23 + addu $24,$23 + lwl $21,55($5) + lwr $21,52($5) + srl $9,$20,24 # byte swap(12) + srl $10,$20,8 + andi $11,$20,0xFF00 + sll $20,$20,24 + andi $10,0xFF00 + sll $11,$11,8 + or $20,$9 + or $10,$11 + or $20,$10 + addu $8,$20,$7 # 12 + srl $7,$1,6 + xor $11,$2,$3 + sll $10,$1,7 + and $11,$1 + srl $9,$1,11 + xor $7,$10 + sll $10,$1,21 + xor $7,$9 + srl $9,$1,25 + xor $7,$10 + sll $10,$1,26 + xor $7,$9 + xor $11,$3 # Ch(e,f,g) + xor $9,$10,$7 # Sigma1(e) + + srl $7,$24,2 + addu $8,$11 + lw $11,48($6) # K[12] + sll $10,$24,10 + addu $8,$9 + srl $9,$24,13 + xor $7,$10 + sll $10,$24,19 + xor $7,$9 + srl $9,$24,22 + xor $7,$10 + sll $10,$24,30 + xor $7,$9 + sw $20,48($29) # offload to ring buffer + xor $7,$10 # Sigma0(a) + + or $9,$24,$25 + and $10,$24,$25 + and $9,$30 + or $10,$9 # Maj(a,b,c) + addu $8,$11 # +=K[12] + addu $7,$10 + + addu $31,$8 + addu $7,$8 + lwl $22,59($5) + lwr $22,56($5) + srl $10,$21,24 # byte swap(13) + srl $11,$21,8 + andi $12,$21,0xFF00 + sll $21,$21,24 + andi $11,0xFF00 + sll $12,$12,8 + or $21,$10 + or $11,$12 + or $21,$11 + addu $9,$21,$3 # 13 + srl $3,$31,6 + xor $12,$1,$2 + sll $11,$31,7 + and $12,$31 + srl $10,$31,11 + xor $3,$11 + sll $11,$31,21 + xor $3,$10 + srl $10,$31,25 + xor $3,$11 + sll $11,$31,26 + xor $3,$10 + xor $12,$2 # Ch(e,f,g) + xor $10,$11,$3 # Sigma1(e) + + srl $3,$7,2 + addu $9,$12 + lw $12,52($6) # K[13] + sll $11,$7,10 + addu $9,$10 + srl $10,$7,13 + xor $3,$11 + sll $11,$7,19 + xor $3,$10 + srl $10,$7,22 + xor $3,$11 + sll $11,$7,30 + xor $3,$10 + sw $21,52($29) # offload to ring buffer + xor $3,$11 # Sigma0(a) + + or $10,$7,$24 + and $11,$7,$24 + and $10,$25 + or $11,$10 # Maj(a,b,c) + addu $9,$12 # +=K[13] + addu $3,$11 + + addu $30,$9 + addu $3,$9 + lw $8,0($29) # prefetch from ring buffer + lwl $23,63($5) + lwr $23,60($5) + srl $11,$22,24 # byte swap(14) + srl $12,$22,8 + andi $13,$22,0xFF00 + sll $22,$22,24 + andi $12,0xFF00 + sll $13,$13,8 + or $22,$11 + or $12,$13 + or $22,$12 + addu $10,$22,$2 # 14 + srl $2,$30,6 + xor $13,$31,$1 + sll $12,$30,7 + and $13,$30 + srl $11,$30,11 + xor $2,$12 + sll $12,$30,21 + xor $2,$11 + srl $11,$30,25 + xor $2,$12 + sll $12,$30,26 + xor $2,$11 + xor $13,$1 # Ch(e,f,g) + xor $11,$12,$2 # Sigma1(e) + + srl $2,$3,2 + addu $10,$13 + lw $13,56($6) # K[14] + sll $12,$3,10 + addu $10,$11 + srl $11,$3,13 + xor $2,$12 + sll $12,$3,19 + xor $2,$11 + srl $11,$3,22 + xor $2,$12 + sll $12,$3,30 + xor $2,$11 + sw $22,56($29) # offload to ring buffer + xor $2,$12 # Sigma0(a) + + or $11,$3,$7 + and $12,$3,$7 + and $11,$24 + or $12,$11 # Maj(a,b,c) + addu $10,$13 # +=K[14] + addu $2,$12 + + addu $25,$10 + addu $2,$10 + lw $9,4($29) # prefetch from ring buffer + srl $12,$23,24 # byte swap(15) + srl $13,$23,8 + andi $14,$23,0xFF00 + sll $23,$23,24 + andi $13,0xFF00 + sll $14,$14,8 + or $23,$12 + or $13,$14 + or $23,$13 + addu $11,$23,$1 # 15 + srl $1,$25,6 + xor $14,$30,$31 + sll $13,$25,7 + and $14,$25 + srl $12,$25,11 + xor $1,$13 + sll $13,$25,21 + xor $1,$12 + srl $12,$25,25 + xor $1,$13 + sll $13,$25,26 + xor $1,$12 + xor $14,$31 # Ch(e,f,g) + xor $12,$13,$1 # Sigma1(e) + + srl $1,$2,2 + addu $11,$14 + lw $14,60($6) # K[15] + sll $13,$2,10 + addu $11,$12 + srl $12,$2,13 + xor $1,$13 + sll $13,$2,19 + xor $1,$12 + srl $12,$2,22 + xor $1,$13 + sll $13,$2,30 + xor $1,$12 + sw $23,60($29) # offload to ring buffer + xor $1,$13 # Sigma0(a) + + or $12,$2,$3 + and $13,$2,$3 + and $12,$7 + or $13,$12 # Maj(a,b,c) + addu $11,$14 # +=K[15] + addu $1,$13 + + addu $24,$11 + addu $1,$11 + lw $10,8($29) # prefetch from ring buffer + b .L16_xx +.align 4 +.L16_xx: + srl $14,$9,3 # Xupdate(16) + addu $8,$17 # +=X[i+9] + sll $13,$9,14 + srl $12,$9,7 + xor $14,$13 + sll $13,11 + xor $14,$12 + srl $12,$9,18 + xor $14,$13 + + srl $15,$22,10 + xor $14,$12 # sigma0(X[i+1]) + sll $13,$22,13 + addu $8,$14 + srl $12,$22,17 + xor $15,$13 + sll $13,2 + xor $15,$12 + srl $12,$22,19 + xor $15,$13 + + xor $15,$12 # sigma1(X[i+14]) + addu $8,$15 + addu $12,$8,$31 # 16 + srl $31,$24,6 + xor $15,$25,$30 + sll $14,$24,7 + and $15,$24 + srl $13,$24,11 + xor $31,$14 + sll $14,$24,21 + xor $31,$13 + srl $13,$24,25 + xor $31,$14 + sll $14,$24,26 + xor $31,$13 + xor $15,$30 # Ch(e,f,g) + xor $13,$14,$31 # Sigma1(e) + + srl $31,$1,2 + addu $12,$15 + lw $15,64($6) # K[16] + sll $14,$1,10 + addu $12,$13 + srl $13,$1,13 + xor $31,$14 + sll $14,$1,19 + xor $31,$13 + srl $13,$1,22 + xor $31,$14 + sll $14,$1,30 + xor $31,$13 + sw $8,0($29) # offload to ring buffer + xor $31,$14 # Sigma0(a) + + or $13,$1,$2 + and $14,$1,$2 + and $13,$3 + or $14,$13 # Maj(a,b,c) + addu $12,$15 # +=K[16] + addu $31,$14 + + addu $7,$12 + addu $31,$12 + lw $11,12($29) # prefetch from ring buffer + srl $15,$10,3 # Xupdate(17) + addu $9,$18 # +=X[i+9] + sll $14,$10,14 + srl $13,$10,7 + xor $15,$14 + sll $14,11 + xor $15,$13 + srl $13,$10,18 + xor $15,$14 + + srl $16,$23,10 + xor $15,$13 # sigma0(X[i+1]) + sll $14,$23,13 + addu $9,$15 + srl $13,$23,17 + xor $16,$14 + sll $14,2 + xor $16,$13 + srl $13,$23,19 + xor $16,$14 + + xor $16,$13 # sigma1(X[i+14]) + addu $9,$16 + addu $13,$9,$30 # 17 + srl $30,$7,6 + xor $16,$24,$25 + sll $15,$7,7 + and $16,$7 + srl $14,$7,11 + xor $30,$15 + sll $15,$7,21 + xor $30,$14 + srl $14,$7,25 + xor $30,$15 + sll $15,$7,26 + xor $30,$14 + xor $16,$25 # Ch(e,f,g) + xor $14,$15,$30 # Sigma1(e) + + srl $30,$31,2 + addu $13,$16 + lw $16,68($6) # K[17] + sll $15,$31,10 + addu $13,$14 + srl $14,$31,13 + xor $30,$15 + sll $15,$31,19 + xor $30,$14 + srl $14,$31,22 + xor $30,$15 + sll $15,$31,30 + xor $30,$14 + sw $9,4($29) # offload to ring buffer + xor $30,$15 # Sigma0(a) + + or $14,$31,$1 + and $15,$31,$1 + and $14,$2 + or $15,$14 # Maj(a,b,c) + addu $13,$16 # +=K[17] + addu $30,$15 + + addu $3,$13 + addu $30,$13 + lw $12,16($29) # prefetch from ring buffer + srl $16,$11,3 # Xupdate(18) + addu $10,$19 # +=X[i+9] + sll $15,$11,14 + srl $14,$11,7 + xor $16,$15 + sll $15,11 + xor $16,$14 + srl $14,$11,18 + xor $16,$15 + + srl $17,$8,10 + xor $16,$14 # sigma0(X[i+1]) + sll $15,$8,13 + addu $10,$16 + srl $14,$8,17 + xor $17,$15 + sll $15,2 + xor $17,$14 + srl $14,$8,19 + xor $17,$15 + + xor $17,$14 # sigma1(X[i+14]) + addu $10,$17 + addu $14,$10,$25 # 18 + srl $25,$3,6 + xor $17,$7,$24 + sll $16,$3,7 + and $17,$3 + srl $15,$3,11 + xor $25,$16 + sll $16,$3,21 + xor $25,$15 + srl $15,$3,25 + xor $25,$16 + sll $16,$3,26 + xor $25,$15 + xor $17,$24 # Ch(e,f,g) + xor $15,$16,$25 # Sigma1(e) + + srl $25,$30,2 + addu $14,$17 + lw $17,72($6) # K[18] + sll $16,$30,10 + addu $14,$15 + srl $15,$30,13 + xor $25,$16 + sll $16,$30,19 + xor $25,$15 + srl $15,$30,22 + xor $25,$16 + sll $16,$30,30 + xor $25,$15 + sw $10,8($29) # offload to ring buffer + xor $25,$16 # Sigma0(a) + + or $15,$30,$31 + and $16,$30,$31 + and $15,$1 + or $16,$15 # Maj(a,b,c) + addu $14,$17 # +=K[18] + addu $25,$16 + + addu $2,$14 + addu $25,$14 + lw $13,20($29) # prefetch from ring buffer + srl $17,$12,3 # Xupdate(19) + addu $11,$20 # +=X[i+9] + sll $16,$12,14 + srl $15,$12,7 + xor $17,$16 + sll $16,11 + xor $17,$15 + srl $15,$12,18 + xor $17,$16 + + srl $18,$9,10 + xor $17,$15 # sigma0(X[i+1]) + sll $16,$9,13 + addu $11,$17 + srl $15,$9,17 + xor $18,$16 + sll $16,2 + xor $18,$15 + srl $15,$9,19 + xor $18,$16 + + xor $18,$15 # sigma1(X[i+14]) + addu $11,$18 + addu $15,$11,$24 # 19 + srl $24,$2,6 + xor $18,$3,$7 + sll $17,$2,7 + and $18,$2 + srl $16,$2,11 + xor $24,$17 + sll $17,$2,21 + xor $24,$16 + srl $16,$2,25 + xor $24,$17 + sll $17,$2,26 + xor $24,$16 + xor $18,$7 # Ch(e,f,g) + xor $16,$17,$24 # Sigma1(e) + + srl $24,$25,2 + addu $15,$18 + lw $18,76($6) # K[19] + sll $17,$25,10 + addu $15,$16 + srl $16,$25,13 + xor $24,$17 + sll $17,$25,19 + xor $24,$16 + srl $16,$25,22 + xor $24,$17 + sll $17,$25,30 + xor $24,$16 + sw $11,12($29) # offload to ring buffer + xor $24,$17 # Sigma0(a) + + or $16,$25,$30 + and $17,$25,$30 + and $16,$31 + or $17,$16 # Maj(a,b,c) + addu $15,$18 # +=K[19] + addu $24,$17 + + addu $1,$15 + addu $24,$15 + lw $14,24($29) # prefetch from ring buffer + srl $18,$13,3 # Xupdate(20) + addu $12,$21 # +=X[i+9] + sll $17,$13,14 + srl $16,$13,7 + xor $18,$17 + sll $17,11 + xor $18,$16 + srl $16,$13,18 + xor $18,$17 + + srl $19,$10,10 + xor $18,$16 # sigma0(X[i+1]) + sll $17,$10,13 + addu $12,$18 + srl $16,$10,17 + xor $19,$17 + sll $17,2 + xor $19,$16 + srl $16,$10,19 + xor $19,$17 + + xor $19,$16 # sigma1(X[i+14]) + addu $12,$19 + addu $16,$12,$7 # 20 + srl $7,$1,6 + xor $19,$2,$3 + sll $18,$1,7 + and $19,$1 + srl $17,$1,11 + xor $7,$18 + sll $18,$1,21 + xor $7,$17 + srl $17,$1,25 + xor $7,$18 + sll $18,$1,26 + xor $7,$17 + xor $19,$3 # Ch(e,f,g) + xor $17,$18,$7 # Sigma1(e) + + srl $7,$24,2 + addu $16,$19 + lw $19,80($6) # K[20] + sll $18,$24,10 + addu $16,$17 + srl $17,$24,13 + xor $7,$18 + sll $18,$24,19 + xor $7,$17 + srl $17,$24,22 + xor $7,$18 + sll $18,$24,30 + xor $7,$17 + sw $12,16($29) # offload to ring buffer + xor $7,$18 # Sigma0(a) + + or $17,$24,$25 + and $18,$24,$25 + and $17,$30 + or $18,$17 # Maj(a,b,c) + addu $16,$19 # +=K[20] + addu $7,$18 + + addu $31,$16 + addu $7,$16 + lw $15,28($29) # prefetch from ring buffer + srl $19,$14,3 # Xupdate(21) + addu $13,$22 # +=X[i+9] + sll $18,$14,14 + srl $17,$14,7 + xor $19,$18 + sll $18,11 + xor $19,$17 + srl $17,$14,18 + xor $19,$18 + + srl $20,$11,10 + xor $19,$17 # sigma0(X[i+1]) + sll $18,$11,13 + addu $13,$19 + srl $17,$11,17 + xor $20,$18 + sll $18,2 + xor $20,$17 + srl $17,$11,19 + xor $20,$18 + + xor $20,$17 # sigma1(X[i+14]) + addu $13,$20 + addu $17,$13,$3 # 21 + srl $3,$31,6 + xor $20,$1,$2 + sll $19,$31,7 + and $20,$31 + srl $18,$31,11 + xor $3,$19 + sll $19,$31,21 + xor $3,$18 + srl $18,$31,25 + xor $3,$19 + sll $19,$31,26 + xor $3,$18 + xor $20,$2 # Ch(e,f,g) + xor $18,$19,$3 # Sigma1(e) + + srl $3,$7,2 + addu $17,$20 + lw $20,84($6) # K[21] + sll $19,$7,10 + addu $17,$18 + srl $18,$7,13 + xor $3,$19 + sll $19,$7,19 + xor $3,$18 + srl $18,$7,22 + xor $3,$19 + sll $19,$7,30 + xor $3,$18 + sw $13,20($29) # offload to ring buffer + xor $3,$19 # Sigma0(a) + + or $18,$7,$24 + and $19,$7,$24 + and $18,$25 + or $19,$18 # Maj(a,b,c) + addu $17,$20 # +=K[21] + addu $3,$19 + + addu $30,$17 + addu $3,$17 + lw $16,32($29) # prefetch from ring buffer + srl $20,$15,3 # Xupdate(22) + addu $14,$23 # +=X[i+9] + sll $19,$15,14 + srl $18,$15,7 + xor $20,$19 + sll $19,11 + xor $20,$18 + srl $18,$15,18 + xor $20,$19 + + srl $21,$12,10 + xor $20,$18 # sigma0(X[i+1]) + sll $19,$12,13 + addu $14,$20 + srl $18,$12,17 + xor $21,$19 + sll $19,2 + xor $21,$18 + srl $18,$12,19 + xor $21,$19 + + xor $21,$18 # sigma1(X[i+14]) + addu $14,$21 + addu $18,$14,$2 # 22 + srl $2,$30,6 + xor $21,$31,$1 + sll $20,$30,7 + and $21,$30 + srl $19,$30,11 + xor $2,$20 + sll $20,$30,21 + xor $2,$19 + srl $19,$30,25 + xor $2,$20 + sll $20,$30,26 + xor $2,$19 + xor $21,$1 # Ch(e,f,g) + xor $19,$20,$2 # Sigma1(e) + + srl $2,$3,2 + addu $18,$21 + lw $21,88($6) # K[22] + sll $20,$3,10 + addu $18,$19 + srl $19,$3,13 + xor $2,$20 + sll $20,$3,19 + xor $2,$19 + srl $19,$3,22 + xor $2,$20 + sll $20,$3,30 + xor $2,$19 + sw $14,24($29) # offload to ring buffer + xor $2,$20 # Sigma0(a) + + or $19,$3,$7 + and $20,$3,$7 + and $19,$24 + or $20,$19 # Maj(a,b,c) + addu $18,$21 # +=K[22] + addu $2,$20 + + addu $25,$18 + addu $2,$18 + lw $17,36($29) # prefetch from ring buffer + srl $21,$16,3 # Xupdate(23) + addu $15,$8 # +=X[i+9] + sll $20,$16,14 + srl $19,$16,7 + xor $21,$20 + sll $20,11 + xor $21,$19 + srl $19,$16,18 + xor $21,$20 + + srl $22,$13,10 + xor $21,$19 # sigma0(X[i+1]) + sll $20,$13,13 + addu $15,$21 + srl $19,$13,17 + xor $22,$20 + sll $20,2 + xor $22,$19 + srl $19,$13,19 + xor $22,$20 + + xor $22,$19 # sigma1(X[i+14]) + addu $15,$22 + addu $19,$15,$1 # 23 + srl $1,$25,6 + xor $22,$30,$31 + sll $21,$25,7 + and $22,$25 + srl $20,$25,11 + xor $1,$21 + sll $21,$25,21 + xor $1,$20 + srl $20,$25,25 + xor $1,$21 + sll $21,$25,26 + xor $1,$20 + xor $22,$31 # Ch(e,f,g) + xor $20,$21,$1 # Sigma1(e) + + srl $1,$2,2 + addu $19,$22 + lw $22,92($6) # K[23] + sll $21,$2,10 + addu $19,$20 + srl $20,$2,13 + xor $1,$21 + sll $21,$2,19 + xor $1,$20 + srl $20,$2,22 + xor $1,$21 + sll $21,$2,30 + xor $1,$20 + sw $15,28($29) # offload to ring buffer + xor $1,$21 # Sigma0(a) + + or $20,$2,$3 + and $21,$2,$3 + and $20,$7 + or $21,$20 # Maj(a,b,c) + addu $19,$22 # +=K[23] + addu $1,$21 + + addu $24,$19 + addu $1,$19 + lw $18,40($29) # prefetch from ring buffer + srl $22,$17,3 # Xupdate(24) + addu $16,$9 # +=X[i+9] + sll $21,$17,14 + srl $20,$17,7 + xor $22,$21 + sll $21,11 + xor $22,$20 + srl $20,$17,18 + xor $22,$21 + + srl $23,$14,10 + xor $22,$20 # sigma0(X[i+1]) + sll $21,$14,13 + addu $16,$22 + srl $20,$14,17 + xor $23,$21 + sll $21,2 + xor $23,$20 + srl $20,$14,19 + xor $23,$21 + + xor $23,$20 # sigma1(X[i+14]) + addu $16,$23 + addu $20,$16,$31 # 24 + srl $31,$24,6 + xor $23,$25,$30 + sll $22,$24,7 + and $23,$24 + srl $21,$24,11 + xor $31,$22 + sll $22,$24,21 + xor $31,$21 + srl $21,$24,25 + xor $31,$22 + sll $22,$24,26 + xor $31,$21 + xor $23,$30 # Ch(e,f,g) + xor $21,$22,$31 # Sigma1(e) + + srl $31,$1,2 + addu $20,$23 + lw $23,96($6) # K[24] + sll $22,$1,10 + addu $20,$21 + srl $21,$1,13 + xor $31,$22 + sll $22,$1,19 + xor $31,$21 + srl $21,$1,22 + xor $31,$22 + sll $22,$1,30 + xor $31,$21 + sw $16,32($29) # offload to ring buffer + xor $31,$22 # Sigma0(a) + + or $21,$1,$2 + and $22,$1,$2 + and $21,$3 + or $22,$21 # Maj(a,b,c) + addu $20,$23 # +=K[24] + addu $31,$22 + + addu $7,$20 + addu $31,$20 + lw $19,44($29) # prefetch from ring buffer + srl $23,$18,3 # Xupdate(25) + addu $17,$10 # +=X[i+9] + sll $22,$18,14 + srl $21,$18,7 + xor $23,$22 + sll $22,11 + xor $23,$21 + srl $21,$18,18 + xor $23,$22 + + srl $8,$15,10 + xor $23,$21 # sigma0(X[i+1]) + sll $22,$15,13 + addu $17,$23 + srl $21,$15,17 + xor $8,$22 + sll $22,2 + xor $8,$21 + srl $21,$15,19 + xor $8,$22 + + xor $8,$21 # sigma1(X[i+14]) + addu $17,$8 + addu $21,$17,$30 # 25 + srl $30,$7,6 + xor $8,$24,$25 + sll $23,$7,7 + and $8,$7 + srl $22,$7,11 + xor $30,$23 + sll $23,$7,21 + xor $30,$22 + srl $22,$7,25 + xor $30,$23 + sll $23,$7,26 + xor $30,$22 + xor $8,$25 # Ch(e,f,g) + xor $22,$23,$30 # Sigma1(e) + + srl $30,$31,2 + addu $21,$8 + lw $8,100($6) # K[25] + sll $23,$31,10 + addu $21,$22 + srl $22,$31,13 + xor $30,$23 + sll $23,$31,19 + xor $30,$22 + srl $22,$31,22 + xor $30,$23 + sll $23,$31,30 + xor $30,$22 + sw $17,36($29) # offload to ring buffer + xor $30,$23 # Sigma0(a) + + or $22,$31,$1 + and $23,$31,$1 + and $22,$2 + or $23,$22 # Maj(a,b,c) + addu $21,$8 # +=K[25] + addu $30,$23 + + addu $3,$21 + addu $30,$21 + lw $20,48($29) # prefetch from ring buffer + srl $8,$19,3 # Xupdate(26) + addu $18,$11 # +=X[i+9] + sll $23,$19,14 + srl $22,$19,7 + xor $8,$23 + sll $23,11 + xor $8,$22 + srl $22,$19,18 + xor $8,$23 + + srl $9,$16,10 + xor $8,$22 # sigma0(X[i+1]) + sll $23,$16,13 + addu $18,$8 + srl $22,$16,17 + xor $9,$23 + sll $23,2 + xor $9,$22 + srl $22,$16,19 + xor $9,$23 + + xor $9,$22 # sigma1(X[i+14]) + addu $18,$9 + addu $22,$18,$25 # 26 + srl $25,$3,6 + xor $9,$7,$24 + sll $8,$3,7 + and $9,$3 + srl $23,$3,11 + xor $25,$8 + sll $8,$3,21 + xor $25,$23 + srl $23,$3,25 + xor $25,$8 + sll $8,$3,26 + xor $25,$23 + xor $9,$24 # Ch(e,f,g) + xor $23,$8,$25 # Sigma1(e) + + srl $25,$30,2 + addu $22,$9 + lw $9,104($6) # K[26] + sll $8,$30,10 + addu $22,$23 + srl $23,$30,13 + xor $25,$8 + sll $8,$30,19 + xor $25,$23 + srl $23,$30,22 + xor $25,$8 + sll $8,$30,30 + xor $25,$23 + sw $18,40($29) # offload to ring buffer + xor $25,$8 # Sigma0(a) + + or $23,$30,$31 + and $8,$30,$31 + and $23,$1 + or $8,$23 # Maj(a,b,c) + addu $22,$9 # +=K[26] + addu $25,$8 + + addu $2,$22 + addu $25,$22 + lw $21,52($29) # prefetch from ring buffer + srl $9,$20,3 # Xupdate(27) + addu $19,$12 # +=X[i+9] + sll $8,$20,14 + srl $23,$20,7 + xor $9,$8 + sll $8,11 + xor $9,$23 + srl $23,$20,18 + xor $9,$8 + + srl $10,$17,10 + xor $9,$23 # sigma0(X[i+1]) + sll $8,$17,13 + addu $19,$9 + srl $23,$17,17 + xor $10,$8 + sll $8,2 + xor $10,$23 + srl $23,$17,19 + xor $10,$8 + + xor $10,$23 # sigma1(X[i+14]) + addu $19,$10 + addu $23,$19,$24 # 27 + srl $24,$2,6 + xor $10,$3,$7 + sll $9,$2,7 + and $10,$2 + srl $8,$2,11 + xor $24,$9 + sll $9,$2,21 + xor $24,$8 + srl $8,$2,25 + xor $24,$9 + sll $9,$2,26 + xor $24,$8 + xor $10,$7 # Ch(e,f,g) + xor $8,$9,$24 # Sigma1(e) + + srl $24,$25,2 + addu $23,$10 + lw $10,108($6) # K[27] + sll $9,$25,10 + addu $23,$8 + srl $8,$25,13 + xor $24,$9 + sll $9,$25,19 + xor $24,$8 + srl $8,$25,22 + xor $24,$9 + sll $9,$25,30 + xor $24,$8 + sw $19,44($29) # offload to ring buffer + xor $24,$9 # Sigma0(a) + + or $8,$25,$30 + and $9,$25,$30 + and $8,$31 + or $9,$8 # Maj(a,b,c) + addu $23,$10 # +=K[27] + addu $24,$9 + + addu $1,$23 + addu $24,$23 + lw $22,56($29) # prefetch from ring buffer + srl $10,$21,3 # Xupdate(28) + addu $20,$13 # +=X[i+9] + sll $9,$21,14 + srl $8,$21,7 + xor $10,$9 + sll $9,11 + xor $10,$8 + srl $8,$21,18 + xor $10,$9 + + srl $11,$18,10 + xor $10,$8 # sigma0(X[i+1]) + sll $9,$18,13 + addu $20,$10 + srl $8,$18,17 + xor $11,$9 + sll $9,2 + xor $11,$8 + srl $8,$18,19 + xor $11,$9 + + xor $11,$8 # sigma1(X[i+14]) + addu $20,$11 + addu $8,$20,$7 # 28 + srl $7,$1,6 + xor $11,$2,$3 + sll $10,$1,7 + and $11,$1 + srl $9,$1,11 + xor $7,$10 + sll $10,$1,21 + xor $7,$9 + srl $9,$1,25 + xor $7,$10 + sll $10,$1,26 + xor $7,$9 + xor $11,$3 # Ch(e,f,g) + xor $9,$10,$7 # Sigma1(e) + + srl $7,$24,2 + addu $8,$11 + lw $11,112($6) # K[28] + sll $10,$24,10 + addu $8,$9 + srl $9,$24,13 + xor $7,$10 + sll $10,$24,19 + xor $7,$9 + srl $9,$24,22 + xor $7,$10 + sll $10,$24,30 + xor $7,$9 + sw $20,48($29) # offload to ring buffer + xor $7,$10 # Sigma0(a) + + or $9,$24,$25 + and $10,$24,$25 + and $9,$30 + or $10,$9 # Maj(a,b,c) + addu $8,$11 # +=K[28] + addu $7,$10 + + addu $31,$8 + addu $7,$8 + lw $23,60($29) # prefetch from ring buffer + srl $11,$22,3 # Xupdate(29) + addu $21,$14 # +=X[i+9] + sll $10,$22,14 + srl $9,$22,7 + xor $11,$10 + sll $10,11 + xor $11,$9 + srl $9,$22,18 + xor $11,$10 + + srl $12,$19,10 + xor $11,$9 # sigma0(X[i+1]) + sll $10,$19,13 + addu $21,$11 + srl $9,$19,17 + xor $12,$10 + sll $10,2 + xor $12,$9 + srl $9,$19,19 + xor $12,$10 + + xor $12,$9 # sigma1(X[i+14]) + addu $21,$12 + addu $9,$21,$3 # 29 + srl $3,$31,6 + xor $12,$1,$2 + sll $11,$31,7 + and $12,$31 + srl $10,$31,11 + xor $3,$11 + sll $11,$31,21 + xor $3,$10 + srl $10,$31,25 + xor $3,$11 + sll $11,$31,26 + xor $3,$10 + xor $12,$2 # Ch(e,f,g) + xor $10,$11,$3 # Sigma1(e) + + srl $3,$7,2 + addu $9,$12 + lw $12,116($6) # K[29] + sll $11,$7,10 + addu $9,$10 + srl $10,$7,13 + xor $3,$11 + sll $11,$7,19 + xor $3,$10 + srl $10,$7,22 + xor $3,$11 + sll $11,$7,30 + xor $3,$10 + sw $21,52($29) # offload to ring buffer + xor $3,$11 # Sigma0(a) + + or $10,$7,$24 + and $11,$7,$24 + and $10,$25 + or $11,$10 # Maj(a,b,c) + addu $9,$12 # +=K[29] + addu $3,$11 + + addu $30,$9 + addu $3,$9 + lw $8,0($29) # prefetch from ring buffer + srl $12,$23,3 # Xupdate(30) + addu $22,$15 # +=X[i+9] + sll $11,$23,14 + srl $10,$23,7 + xor $12,$11 + sll $11,11 + xor $12,$10 + srl $10,$23,18 + xor $12,$11 + + srl $13,$20,10 + xor $12,$10 # sigma0(X[i+1]) + sll $11,$20,13 + addu $22,$12 + srl $10,$20,17 + xor $13,$11 + sll $11,2 + xor $13,$10 + srl $10,$20,19 + xor $13,$11 + + xor $13,$10 # sigma1(X[i+14]) + addu $22,$13 + addu $10,$22,$2 # 30 + srl $2,$30,6 + xor $13,$31,$1 + sll $12,$30,7 + and $13,$30 + srl $11,$30,11 + xor $2,$12 + sll $12,$30,21 + xor $2,$11 + srl $11,$30,25 + xor $2,$12 + sll $12,$30,26 + xor $2,$11 + xor $13,$1 # Ch(e,f,g) + xor $11,$12,$2 # Sigma1(e) + + srl $2,$3,2 + addu $10,$13 + lw $13,120($6) # K[30] + sll $12,$3,10 + addu $10,$11 + srl $11,$3,13 + xor $2,$12 + sll $12,$3,19 + xor $2,$11 + srl $11,$3,22 + xor $2,$12 + sll $12,$3,30 + xor $2,$11 + sw $22,56($29) # offload to ring buffer + xor $2,$12 # Sigma0(a) + + or $11,$3,$7 + and $12,$3,$7 + and $11,$24 + or $12,$11 # Maj(a,b,c) + addu $10,$13 # +=K[30] + addu $2,$12 + + addu $25,$10 + addu $2,$10 + lw $9,4($29) # prefetch from ring buffer + srl $13,$8,3 # Xupdate(31) + addu $23,$16 # +=X[i+9] + sll $12,$8,14 + srl $11,$8,7 + xor $13,$12 + sll $12,11 + xor $13,$11 + srl $11,$8,18 + xor $13,$12 + + srl $14,$21,10 + xor $13,$11 # sigma0(X[i+1]) + sll $12,$21,13 + addu $23,$13 + srl $11,$21,17 + xor $14,$12 + sll $12,2 + xor $14,$11 + srl $11,$21,19 + xor $14,$12 + + xor $14,$11 # sigma1(X[i+14]) + addu $23,$14 + addu $11,$23,$1 # 31 + srl $1,$25,6 + xor $14,$30,$31 + sll $13,$25,7 + and $14,$25 + srl $12,$25,11 + xor $1,$13 + sll $13,$25,21 + xor $1,$12 + srl $12,$25,25 + xor $1,$13 + sll $13,$25,26 + xor $1,$12 + xor $14,$31 # Ch(e,f,g) + xor $12,$13,$1 # Sigma1(e) + + srl $1,$2,2 + addu $11,$14 + lw $14,124($6) # K[31] + sll $13,$2,10 + addu $11,$12 + srl $12,$2,13 + xor $1,$13 + sll $13,$2,19 + xor $1,$12 + srl $12,$2,22 + xor $1,$13 + sll $13,$2,30 + xor $1,$12 + sw $23,60($29) # offload to ring buffer + xor $1,$13 # Sigma0(a) + + or $12,$2,$3 + and $13,$2,$3 + and $12,$7 + or $13,$12 # Maj(a,b,c) + addu $11,$14 # +=K[31] + addu $1,$13 + + addu $24,$11 + addu $1,$11 + lw $10,8($29) # prefetch from ring buffer + and $14,0xfff + li $15,2290 + .set noreorder + bne $14,$15,.L16_xx + add $6,16*4 # Ktbl+=16 + + lw $23,16*4($29) # restore pointer to the end of input + lw $8,0*4($4) + lw $9,1*4($4) + lw $10,2*4($4) + add $5,16*4 + lw $11,3*4($4) + addu $1,$8 + lw $12,4*4($4) + addu $2,$9 + lw $13,5*4($4) + addu $3,$10 + lw $14,6*4($4) + addu $7,$11 + lw $15,7*4($4) + addu $24,$12 + sw $1,0*4($4) + addu $25,$13 + sw $2,1*4($4) + addu $30,$14 + sw $3,2*4($4) + addu $31,$15 + sw $7,3*4($4) + sw $24,4*4($4) + sw $25,5*4($4) + sw $30,6*4($4) + sw $31,7*4($4) + + bne $5,$23,.Loop + sub $6,192 # rewind $6 + + lw $31,128-1*4($29) + lw $30,128-2*4($29) + lw $23,128-3*4($29) + lw $22,128-4*4($29) + lw $21,128-5*4($29) + lw $20,128-6*4($29) + lw $19,128-7*4($29) + lw $18,128-8*4($29) + lw $17,128-9*4($29) + lw $16,128-10*4($29) + jr $31 + add $29,128 +.end sha256_block_data_order + +.rdata +.align 5 +K256: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +.asciiz "SHA256 for MIPS, CRYPTOGAMS by " +.align 5 + diff --git a/app/openssl/crypto/sha/asm/sha256-x86_64.S b/app/openssl/crypto/sha/asm/sha256-x86_64.S new file mode 100644 index 00000000..db5b898f --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha256-x86_64.S @@ -0,0 +1,1778 @@ +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/app/openssl/crypto/sha/asm/sha512-586.S b/app/openssl/crypto/sha/asm/sha512-586.S new file mode 100644 index 00000000..82c76c41 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-586.S @@ -0,0 +1,836 @@ +.file "sha512-586.s" +.text +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal _GLOBAL_OFFSET_TABLE_+[.-.L001K512](%ebp),%edx + movl OPENSSL_ia32cap_P@GOT(%edx),%edx + btl $26,(%edx) + jnc .L002loop_x86 + movq (%esi),%mm0 + movq 8(%esi),%mm1 + movq 16(%esi),%mm2 + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + subl $80,%esp +.align 16 +.L003loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + movq %mm7,56(%esp) + movl (%edi),%ecx + movl 4(%edi),%edx + addl $8,%edi + bswap %ecx + bswap %edx + movl %ecx,76(%esp) + movl %edx,72(%esp) +.align 16 +.L00400_14_sse2: + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + movl %eax,68(%esp) + movl %ebx,64(%esp) + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + movq %mm4,%mm1 + movq %mm4,%mm2 + psrlq $14,%mm1 + movq %mm4,32(%esp) + psllq $23,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm2,%mm3 + psllq $23,%mm2 + pxor %mm1,%mm3 + psrlq $23,%mm1 + pxor %mm2,%mm3 + psllq $4,%mm2 + pxor %mm1,%mm3 + paddq (%ebp),%mm7 + pxor %mm2,%mm3 + pxor %mm6,%mm5 + movq 8(%esp),%mm1 + pand %mm4,%mm5 + movq 16(%esp),%mm2 + pxor %mm6,%mm5 + movq 24(%esp),%mm4 + paddq %mm5,%mm3 + movq %mm0,(%esp) + paddq %mm7,%mm3 + movq %mm0,%mm5 + movq %mm0,%mm6 + paddq 72(%esp),%mm3 + psrlq $28,%mm5 + paddq %mm3,%mm4 + psllq $25,%mm6 + movq %mm5,%mm7 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + psrlq $5,%mm5 + pxor %mm6,%mm7 + psllq $6,%mm6 + pxor %mm5,%mm7 + subl $8,%esp + pxor %mm6,%mm7 + movq %mm0,%mm5 + por %mm2,%mm0 + pand %mm2,%mm5 + pand %mm1,%mm0 + por %mm0,%mm5 + paddq %mm5,%mm7 + movq %mm3,%mm0 + movb (%ebp),%dl + paddq %mm7,%mm0 + addl $8,%ebp + cmpb $53,%dl + jne .L00400_14_sse2 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + movq %mm4,%mm1 + movq %mm4,%mm2 + psrlq $14,%mm1 + movq %mm4,32(%esp) + psllq $23,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm2,%mm3 + psllq $23,%mm2 + pxor %mm1,%mm3 + psrlq $23,%mm1 + pxor %mm2,%mm3 + psllq $4,%mm2 + pxor %mm1,%mm3 + paddq (%ebp),%mm7 + pxor %mm2,%mm3 + pxor %mm6,%mm5 + movq 8(%esp),%mm1 + pand %mm4,%mm5 + movq 16(%esp),%mm2 + pxor %mm6,%mm5 + movq 24(%esp),%mm4 + paddq %mm5,%mm3 + movq %mm0,(%esp) + paddq %mm7,%mm3 + movq %mm0,%mm5 + movq %mm0,%mm6 + paddq 72(%esp),%mm3 + psrlq $28,%mm5 + paddq %mm3,%mm4 + psllq $25,%mm6 + movq %mm5,%mm7 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + psrlq $5,%mm5 + pxor %mm6,%mm7 + psllq $6,%mm6 + pxor %mm5,%mm7 + subl $8,%esp + pxor %mm6,%mm7 + movq %mm0,%mm5 + por %mm2,%mm0 + movq 88(%esp),%mm6 + pand %mm2,%mm5 + pand %mm1,%mm0 + movq 192(%esp),%mm2 + por %mm0,%mm5 + paddq %mm5,%mm7 + movq %mm3,%mm0 + movb (%ebp),%dl + paddq %mm7,%mm0 + addl $8,%ebp +.align 16 +.L00516_79_sse2: + movq %mm2,%mm1 + psrlq $1,%mm2 + movq %mm6,%mm7 + psrlq $6,%mm6 + movq %mm2,%mm3 + psrlq $6,%mm2 + movq %mm6,%mm5 + psrlq $13,%mm6 + pxor %mm2,%mm3 + psrlq $1,%mm2 + pxor %mm6,%mm5 + psrlq $42,%mm6 + pxor %mm2,%mm3 + movq 200(%esp),%mm2 + psllq $56,%mm1 + pxor %mm6,%mm5 + psllq $3,%mm7 + pxor %mm1,%mm3 + paddq 128(%esp),%mm2 + psllq $7,%mm1 + pxor %mm7,%mm5 + psllq $42,%mm7 + pxor %mm1,%mm3 + pxor %mm7,%mm5 + paddq %mm5,%mm3 + paddq %mm2,%mm3 + movq %mm3,72(%esp) + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + movq %mm4,%mm1 + movq %mm4,%mm2 + psrlq $14,%mm1 + movq %mm4,32(%esp) + psllq $23,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm2,%mm3 + psllq $23,%mm2 + pxor %mm1,%mm3 + psrlq $23,%mm1 + pxor %mm2,%mm3 + psllq $4,%mm2 + pxor %mm1,%mm3 + paddq (%ebp),%mm7 + pxor %mm2,%mm3 + pxor %mm6,%mm5 + movq 8(%esp),%mm1 + pand %mm4,%mm5 + movq 16(%esp),%mm2 + pxor %mm6,%mm5 + movq 24(%esp),%mm4 + paddq %mm5,%mm3 + movq %mm0,(%esp) + paddq %mm7,%mm3 + movq %mm0,%mm5 + movq %mm0,%mm6 + paddq 72(%esp),%mm3 + psrlq $28,%mm5 + paddq %mm3,%mm4 + psllq $25,%mm6 + movq %mm5,%mm7 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + psrlq $5,%mm5 + pxor %mm6,%mm7 + psllq $6,%mm6 + pxor %mm5,%mm7 + subl $8,%esp + pxor %mm6,%mm7 + movq %mm0,%mm5 + por %mm2,%mm0 + movq 88(%esp),%mm6 + pand %mm2,%mm5 + pand %mm1,%mm0 + movq 192(%esp),%mm2 + por %mm0,%mm5 + paddq %mm5,%mm7 + movq %mm3,%mm0 + movb (%ebp),%dl + paddq %mm7,%mm0 + addl $8,%ebp + cmpb $23,%dl + jne .L00516_79_sse2 + movq 8(%esp),%mm1 + movq 16(%esp),%mm2 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + addl $640,%esp + subl $640,%ebp + cmpl 88(%esp),%edi + jb .L003loop_sse2 + emms + movl 92(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00600_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00600_15_x86 +.align 16 +.L00716_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L00716_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.size sha512_block_data_order,.-.L_sha512_block_data_order_begin +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/app/openssl/crypto/sha/asm/sha512-586.pl b/app/openssl/crypto/sha/asm/sha512-586.pl index 5b9f3337..9f8c51eb 100644 --- a/app/openssl/crypto/sha/asm/sha512-586.pl +++ b/app/openssl/crypto/sha/asm/sha512-586.pl @@ -23,7 +23,7 @@ # # IALU code-path is optimized for elder Pentiums. On vanilla Pentium # performance improvement over compiler generated code reaches ~60%, -# while on PIII - ~35%. On newer µ-archs improvement varies from 15% +# while on PIII - ~35%. On newer µ-archs improvement varies from 15% # to 50%, but it's less important as they are expected to execute SSE2 # code-path, which is commonly ~2-3x faster [than compiler generated # code]. SSE2 code-path is as fast as original sha512-sse2.pl, even @@ -142,9 +142,9 @@ sub BODY_00_15_x86 { &mov ("edx",$Ehi); &mov ("esi","ecx"); - &shr ("ecx",9) # lo>>9 + &shr ("ecx",9); # lo>>9 &mov ("edi","edx"); - &shr ("edx",9) # hi>>9 + &shr ("edx",9); # hi>>9 &mov ("ebx","ecx"); &shl ("esi",14); # lo<<14 &mov ("eax","edx"); @@ -207,9 +207,9 @@ sub BODY_00_15_x86 { &mov ($Dhi,"ebx"); &mov ("esi","ecx"); - &shr ("ecx",2) # lo>>2 + &shr ("ecx",2); # lo>>2 &mov ("edi","edx"); - &shr ("edx",2) # hi>>2 + &shr ("edx",2); # hi>>2 &mov ("ebx","ecx"); &shl ("esi",4); # lo<<4 &mov ("eax","edx"); @@ -452,9 +452,9 @@ if ($sse2) { &mov ("edx",&DWP(8*(9+15+16-1)+4,"esp")); &mov ("esi","ecx"); - &shr ("ecx",1) # lo>>1 + &shr ("ecx",1); # lo>>1 &mov ("edi","edx"); - &shr ("edx",1) # hi>>1 + &shr ("edx",1); # hi>>1 &mov ("eax","ecx"); &shl ("esi",24); # lo<<24 &mov ("ebx","edx"); @@ -488,9 +488,9 @@ if ($sse2) { &mov ("edx",&DWP(8*(9+15+16-14)+4,"esp")); &mov ("esi","ecx"); - &shr ("ecx",6) # lo>>6 + &shr ("ecx",6); # lo>>6 &mov ("edi","edx"); - &shr ("edx",6) # hi>>6 + &shr ("edx",6); # hi>>6 &mov ("eax","ecx"); &shl ("esi",3); # lo<<3 &mov ("ebx","edx"); diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.S b/app/openssl/crypto/sha/asm/sha512-armv4.S new file mode 120000 index 00000000..046c909a --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-armv4.S @@ -0,0 +1 @@ +sha512-armv4.s \ No newline at end of file diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.pl b/app/openssl/crypto/sha/asm/sha512-armv4.pl index 3a35861a..7faf37b1 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/app/openssl/crypto/sha/asm/sha512-armv4.pl @@ -18,22 +18,33 @@ # Rescheduling for dual-issue pipeline resulted in 6% improvement on # Cortex A8 core and ~40 cycles per processed byte. +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 7% +# improvement on Coxtex A8 core and ~38 cycles per byte. + +# March 2011. +# +# Add NEON implementation. On Cortex A8 it was measured to process +# one byte in 25.5 cycles or 47% faster than integer-only code. + # Byte order [in]dependence. ========================================= # -# Caller is expected to maintain specific *dword* order in h[0-7], -# namely with most significant dword at *lower* address, which is -# reflected in below two parameters. *Byte* order within these dwords -# in turn is whatever *native* byte order on current platform. -$hi=0; -$lo=4; +# Originally caller was expected to maintain specific *dword* order in +# h[0-7], namely with most significant dword at *lower* address, which +# was reflected in below two parameters as 0 and 4. Now caller is +# expected to maintain native byte order for whole 64-bit values. +$hi="HI"; +$lo="LO"; # ==================================================================== while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; -$ctx="r0"; +$ctx="r0"; # parameter block $inp="r1"; $len="r2"; + $Tlo="r3"; $Thi="r4"; $Alo="r5"; @@ -61,15 +72,17 @@ $Xoff=8*8; sub BODY_00_15() { my $magic = shift; $code.=<<___; - ldr $t2,[sp,#$Hoff+0] @ h.lo - ldr $t3,[sp,#$Hoff+4] @ h.hi @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 mov $t0,$Elo,lsr#14 + str $Tlo,[sp,#$Xoff+0] mov $t1,$Ehi,lsr#14 + str $Thi,[sp,#$Xoff+4] eor $t0,$t0,$Ehi,lsl#18 + ldr $t2,[sp,#$Hoff+0] @ h.lo eor $t1,$t1,$Elo,lsl#18 + ldr $t3,[sp,#$Hoff+4] @ h.hi eor $t0,$t0,$Elo,lsr#18 eor $t1,$t1,$Ehi,lsr#18 eor $t0,$t0,$Ehi,lsl#14 @@ -96,25 +109,24 @@ $code.=<<___; and $t1,$t1,$Ehi str $Ahi,[sp,#$Aoff+4] eor $t0,$t0,$t2 - ldr $t2,[$Ktbl,#4] @ K[i].lo + ldr $t2,[$Ktbl,#$lo] @ K[i].lo eor $t1,$t1,$t3 @ Ch(e,f,g) - ldr $t3,[$Ktbl,#0] @ K[i].hi + ldr $t3,[$Ktbl,#$hi] @ K[i].hi adds $Tlo,$Tlo,$t0 ldr $Elo,[sp,#$Doff+0] @ d.lo adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) ldr $Ehi,[sp,#$Doff+4] @ d.hi adds $Tlo,$Tlo,$t2 + and $t0,$t2,#0xff adc $Thi,$Thi,$t3 @ T += K[i] adds $Elo,$Elo,$Tlo + ldr $t2,[sp,#$Boff+0] @ b.lo adc $Ehi,$Ehi,$Thi @ d += T - - and $t0,$t2,#0xff teq $t0,#$magic - orreq $Ktbl,$Ktbl,#1 - ldr $t2,[sp,#$Boff+0] @ b.lo ldr $t3,[sp,#$Coff+0] @ c.lo + orreq $Ktbl,$Ktbl,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 @@ -131,80 +143,100 @@ $code.=<<___; eor $t0,$t0,$Alo,lsl#25 eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) adds $Tlo,$Tlo,$t0 + and $t0,$Alo,$t2 adc $Thi,$Thi,$t1 @ T += Sigma0(a) - and $t0,$Alo,$t2 - orr $Alo,$Alo,$t2 ldr $t1,[sp,#$Boff+4] @ b.hi + orr $Alo,$Alo,$t2 ldr $t2,[sp,#$Coff+4] @ c.hi and $Alo,$Alo,$t3 - orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo and $t3,$Ahi,$t1 orr $Ahi,$Ahi,$t1 + orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo and $Ahi,$Ahi,$t2 - orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi adds $Alo,$Alo,$Tlo - adc $Ahi,$Ahi,$Thi @ h += T - + orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi sub sp,sp,#8 + adc $Ahi,$Ahi,$Thi @ h += T + tst $Ktbl,#1 add $Ktbl,$Ktbl,#8 ___ } $code=<<___; +#include "arm_arch.h" +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + .text .code 32 .type K512,%object .align 5 K512: -.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd -.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc -.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 -.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 -.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe -.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 -.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 -.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 -.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 -.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 -.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 -.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 -.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 -.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 -.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 -.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 -.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 -.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df -.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 -.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b -.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 -.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 -.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 -.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 -.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 -.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 -.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb -.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 -.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 -.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec -.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 -.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b -.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 -.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 -.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 -.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b -.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 -.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c -.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a -.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add $len,$inp,$len,lsl#7 @ len to point at the end of inp +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif stmdb sp!,{r4-r12,lr} - sub $Ktbl,r3,#640 @ K512 + sub $Ktbl,r3,#672 @ K512 sub sp,sp,#9*8 ldr $Elo,[$ctx,#$Eoff+$lo] @@ -238,6 +270,7 @@ sha512_block_data_order: str $Thi,[sp,#$Foff+4] .L00_15: +#if __ARM_ARCH__<7 ldrb $Tlo,[$inp,#7] ldrb $t0, [$inp,#6] ldrb $t1, [$inp,#5] @@ -252,26 +285,30 @@ sha512_block_data_order: orr $Thi,$Thi,$t3,lsl#8 orr $Thi,$Thi,$t0,lsl#16 orr $Thi,$Thi,$t1,lsl#24 - str $Tlo,[sp,#$Xoff+0] - str $Thi,[sp,#$Xoff+4] +#else + ldr $Tlo,[$inp,#4] + ldr $Thi,[$inp],#8 +#ifdef __ARMEL__ + rev $Tlo,$Tlo + rev $Thi,$Thi +#endif +#endif ___ &BODY_00_15(0x94); $code.=<<___; tst $Ktbl,#1 beq .L00_15 - bic $Ktbl,$Ktbl,#1 - -.L16_79: ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] - ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] - ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] - + bic $Ktbl,$Ktbl,#1 +.L16_79: @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 mov $Tlo,$t0,lsr#1 + ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] mov $Thi,$t1,lsr#1 + ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] eor $Tlo,$Tlo,$t1,lsl#31 eor $Thi,$Thi,$t0,lsl#31 eor $Tlo,$Tlo,$t0,lsr#8 @@ -295,25 +332,24 @@ $code.=<<___; eor $t1,$t1,$t3,lsl#3 eor $t0,$t0,$t2,lsr#6 eor $t1,$t1,$t3,lsr#6 + ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] eor $t0,$t0,$t3,lsl#26 - ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] adds $Tlo,$Tlo,$t0 + ldr $t0,[sp,#`$Xoff+8*16`+0] adc $Thi,$Thi,$t1 - ldr $t0,[sp,#`$Xoff+8*16`+0] ldr $t1,[sp,#`$Xoff+8*16`+4] adds $Tlo,$Tlo,$t2 adc $Thi,$Thi,$t3 adds $Tlo,$Tlo,$t0 adc $Thi,$Thi,$t1 - str $Tlo,[sp,#$Xoff+0] - str $Thi,[sp,#$Xoff+4] ___ &BODY_00_15(0x17); $code.=<<___; - tst $Ktbl,#1 + ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] + ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] beq .L16_79 bic $Ktbl,$Ktbl,#1 @@ -324,12 +360,12 @@ $code.=<<___; ldr $t2, [$ctx,#$Boff+$lo] ldr $t3, [$ctx,#$Boff+$hi] adds $t0,$Alo,$t0 - adc $t1,$Ahi,$t1 - adds $t2,$Tlo,$t2 - adc $t3,$Thi,$t3 str $t0, [$ctx,#$Aoff+$lo] + adc $t1,$Ahi,$t1 str $t1, [$ctx,#$Aoff+$hi] + adds $t2,$Tlo,$t2 str $t2, [$ctx,#$Boff+$lo] + adc $t3,$Thi,$t3 str $t3, [$ctx,#$Boff+$hi] ldr $Alo,[sp,#$Coff+0] @@ -341,12 +377,12 @@ $code.=<<___; ldr $t2, [$ctx,#$Doff+$lo] ldr $t3, [$ctx,#$Doff+$hi] adds $t0,$Alo,$t0 - adc $t1,$Ahi,$t1 - adds $t2,$Tlo,$t2 - adc $t3,$Thi,$t3 str $t0, [$ctx,#$Coff+$lo] + adc $t1,$Ahi,$t1 str $t1, [$ctx,#$Coff+$hi] + adds $t2,$Tlo,$t2 str $t2, [$ctx,#$Doff+$lo] + adc $t3,$Thi,$t3 str $t3, [$ctx,#$Doff+$hi] ldr $Tlo,[sp,#$Foff+0] @@ -356,12 +392,12 @@ $code.=<<___; ldr $t2, [$ctx,#$Foff+$lo] ldr $t3, [$ctx,#$Foff+$hi] adds $Elo,$Elo,$t0 - adc $Ehi,$Ehi,$t1 - adds $t2,$Tlo,$t2 - adc $t3,$Thi,$t3 str $Elo,[$ctx,#$Eoff+$lo] + adc $Ehi,$Ehi,$t1 str $Ehi,[$ctx,#$Eoff+$hi] + adds $t2,$Tlo,$t2 str $t2, [$ctx,#$Foff+$lo] + adc $t3,$Thi,$t3 str $t3, [$ctx,#$Foff+$hi] ldr $Alo,[sp,#$Goff+0] @@ -373,12 +409,12 @@ $code.=<<___; ldr $t2, [$ctx,#$Hoff+$lo] ldr $t3, [$ctx,#$Hoff+$hi] adds $t0,$Alo,$t0 - adc $t1,$Ahi,$t1 - adds $t2,$Tlo,$t2 - adc $t3,$Thi,$t3 str $t0, [$ctx,#$Goff+$lo] + adc $t1,$Ahi,$t1 str $t1, [$ctx,#$Goff+$hi] + adds $t2,$Tlo,$t2 str $t2, [$ctx,#$Hoff+$lo] + adc $t3,$Thi,$t3 str $t3, [$ctx,#$Hoff+$hi] add sp,sp,#640 @@ -388,13 +424,156 @@ $code.=<<___; bne .Loop add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) -.size sha512_block_data_order,.-sha512_block_data_order -.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by " +#endif +___ + +{ +my @Sigma0=(28,34,39); +my @Sigma1=(14,18,41); +my @sigma0=(1, 8, 7); +my @sigma1=(19,61,6); + +my $Ktbl="r3"; +my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch + +my @X=map("d$_",(0..15)); +my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23)); + +sub NEON_00_15() { +my $i=shift; +my ($a,$b,$c,$d,$e,$f,$g,$h)=@_; +my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps + +$code.=<<___ if ($i<16 || $i&1); + vshr.u64 $t0,$e,#@Sigma1[0] @ $i +#if $i<16 + vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned +#endif + vshr.u64 $t1,$e,#@Sigma1[1] + vshr.u64 $t2,$e,#@Sigma1[2] +___ +$code.=<<___; + vld1.64 {$K},[$Ktbl,:64]! @ K[i++] + vsli.64 $t0,$e,#`64-@Sigma1[0]` + vsli.64 $t1,$e,#`64-@Sigma1[1]` + vsli.64 $t2,$e,#`64-@Sigma1[2]` +#if $i<16 && defined(__ARMEL__) + vrev64.8 @X[$i],@X[$i] +#endif + vadd.i64 $T1,$K,$h + veor $Ch,$f,$g + veor $t0,$t1 + vand $Ch,$e + veor $t0,$t2 @ Sigma1(e) + veor $Ch,$g @ Ch(e,f,g) + vadd.i64 $T1,$t0 + vshr.u64 $t0,$a,#@Sigma0[0] + vadd.i64 $T1,$Ch + vshr.u64 $t1,$a,#@Sigma0[1] + vshr.u64 $t2,$a,#@Sigma0[2] + vsli.64 $t0,$a,#`64-@Sigma0[0]` + vsli.64 $t1,$a,#`64-@Sigma0[1]` + vsli.64 $t2,$a,#`64-@Sigma0[2]` + vadd.i64 $T1,@X[$i%16] + vorr $Maj,$a,$c + vand $Ch,$a,$c + veor $h,$t0,$t1 + vand $Maj,$b + veor $h,$t2 @ Sigma0(a) + vorr $Maj,$Ch @ Maj(a,b,c) + vadd.i64 $h,$T1 + vadd.i64 $d,$T1 + vadd.i64 $h,$Maj +___ +} + +sub NEON_16_79() { +my $i=shift; + +if ($i&1) { &NEON_00_15($i,@_); return; } + +# 2x-vectorized, therefore runs every 2nd round +my @X=map("q$_",(0..7)); # view @X as 128-bit vector +my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps +my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15 +my $e=@_[4]; # $e from NEON_00_15 +$i /= 2; +$code.=<<___; + vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0] + vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1] + vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2] + vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]` + vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1] + vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]` + veor $s1,$t0 + vshr.u64 $t0,$s0,#@sigma0[0] + veor $s1,$t1 @ sigma1(X[i+14]) + vshr.u64 $t1,$s0,#@sigma0[1] + vadd.i64 @X[$i%8],$s1 + vshr.u64 $s1,$s0,#@sigma0[2] + vsli.64 $t0,$s0,#`64-@sigma0[0]` + vsli.64 $t1,$s0,#`64-@sigma0[1]` + vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9] + veor $s1,$t0 + vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s0 + vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15 + veor $s1,$t1 @ sigma0(X[i+1]) + vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s1 +___ + &NEON_00_15(2*$i,@_); +} + +$code.=<<___; +#if __ARM_ARCH__>=7 +.fpu neon + +.align 4 +.LNEON: + dmb @ errata #451034 on early Cortex A8 + vstmdb sp!,{d8-d15} @ ABI specification says so + sub $Ktbl,r3,#672 @ K512 + vldmia $ctx,{$A-$H} @ load context +.Loop_neon: +___ +for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mov $cnt,#4 +.L16_79_neon: + subs $cnt,#1 +___ +for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bne .L16_79_neon + + vldmia $ctx,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia $ctx,{$A-$H} @ save context + teq $inp,$len + sub $Ktbl,#640 @ rewind K512 + bne .Loop_neon + + vldmia sp!,{d8-d15} @ epilogue + bx lr +#endif +___ +} +$code.=<<___; +.size sha512_block_data_order,.-sha512_block_data_order +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 +.comm OPENSSL_armcap_P,4,4 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.s b/app/openssl/crypto/sha/asm/sha512-armv4.s index b030c16c..57301922 100644 --- a/app/openssl/crypto/sha/asm/sha512-armv4.s +++ b/app/openssl/crypto/sha/asm/sha512-armv4.s @@ -1,90 +1,111 @@ +#include "arm_arch.h" +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + .text .code 32 .type K512,%object .align 5 K512: -.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd -.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc -.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 -.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 -.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe -.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 -.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 -.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 -.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 -.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 -.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 -.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 -.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 -.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 -.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 -.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 -.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 -.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df -.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 -.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b -.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 -.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 -.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 -.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 -.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 -.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 -.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb -.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 -.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 -.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec -.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 -.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b -.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 -.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 -.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 -.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b -.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 -.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c -.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a -.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add r2,r1,r2,lsl#7 @ len to point at the end of inp +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif stmdb sp!,{r4-r12,lr} - sub r14,r3,#640 @ K512 + sub r14,r3,#672 @ K512 sub sp,sp,#9*8 - ldr r7,[r0,#32+4] - ldr r8,[r0,#32+0] - ldr r9, [r0,#48+4] - ldr r10, [r0,#48+0] - ldr r11, [r0,#56+4] - ldr r12, [r0,#56+0] + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] .Loop: str r9, [sp,#48+0] str r10, [sp,#48+4] str r11, [sp,#56+0] str r12, [sp,#56+4] - ldr r5,[r0,#0+4] - ldr r6,[r0,#0+0] - ldr r3,[r0,#8+4] - ldr r4,[r0,#8+0] - ldr r9, [r0,#16+4] - ldr r10, [r0,#16+0] - ldr r11, [r0,#24+4] - ldr r12, [r0,#24+0] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] str r3,[sp,#8+0] str r4,[sp,#8+4] str r9, [sp,#16+0] str r10, [sp,#16+4] str r11, [sp,#24+0] str r12, [sp,#24+4] - ldr r3,[r0,#40+4] - ldr r4,[r0,#40+0] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] str r3,[sp,#40+0] str r4,[sp,#40+4] .L00_15: +#if __ARM_ARCH__<7 ldrb r3,[r1,#7] ldrb r9, [r1,#6] ldrb r10, [r1,#5] @@ -99,17 +120,25 @@ sha512_block_data_order: orr r4,r4,r12,lsl#8 orr r4,r4,r9,lsl#16 orr r4,r4,r10,lsl#24 - str r3,[sp,#64+0] - str r4,[sp,#64+4] - ldr r11,[sp,#56+0] @ h.lo - ldr r12,[sp,#56+4] @ h.hi +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 mov r9,r7,lsr#14 + str r3,[sp,#64+0] mov r10,r8,lsr#14 + str r4,[sp,#64+4] eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi eor r9,r9,r7,lsr#18 eor r10,r10,r8,lsr#18 eor r9,r9,r8,lsl#14 @@ -136,25 +165,24 @@ sha512_block_data_order: and r10,r10,r8 str r6,[sp,#0+4] eor r9,r9,r11 - ldr r11,[r14,#4] @ K[i].lo + ldr r11,[r14,#LO] @ K[i].lo eor r10,r10,r12 @ Ch(e,f,g) - ldr r12,[r14,#0] @ K[i].hi + ldr r12,[r14,#HI] @ K[i].hi adds r3,r3,r9 ldr r7,[sp,#24+0] @ d.lo adc r4,r4,r10 @ T += Ch(e,f,g) ldr r8,[sp,#24+4] @ d.hi adds r3,r3,r11 + and r9,r11,#0xff adc r4,r4,r12 @ T += K[i] adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo adc r8,r8,r4 @ d += T - - and r9,r11,#0xff teq r9,#148 - orreq r14,r14,#1 - ldr r11,[sp,#8+0] @ b.lo ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 @@ -171,38 +199,36 @@ sha512_block_data_order: eor r9,r9,r5,lsl#25 eor r10,r10,r6,lsl#25 @ Sigma0(a) adds r3,r3,r9 + and r9,r5,r11 adc r4,r4,r10 @ T += Sigma0(a) - and r9,r5,r11 - orr r5,r5,r11 ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 ldr r11,[sp,#16+4] @ c.hi and r5,r5,r12 - orr r5,r5,r9 @ Maj(a,b,c).lo and r12,r6,r10 orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo and r6,r6,r11 - orr r6,r6,r12 @ Maj(a,b,c).hi adds r5,r5,r3 - adc r6,r6,r4 @ h += T - + orr r6,r6,r12 @ Maj(a,b,c).hi sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 add r14,r14,#8 tst r14,#1 beq .L00_15 - bic r14,r14,#1 - -.L16_79: ldr r9,[sp,#184+0] ldr r10,[sp,#184+4] - ldr r11,[sp,#80+0] - ldr r12,[sp,#80+4] - + bic r14,r14,#1 +.L16_79: @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] eor r3,r3,r10,lsl#31 eor r4,r4,r9,lsl#31 eor r3,r3,r9,lsr#8 @@ -226,30 +252,30 @@ sha512_block_data_order: eor r10,r10,r12,lsl#3 eor r9,r9,r11,lsr#6 eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] eor r9,r9,r12,lsl#26 - ldr r11,[sp,#120+0] ldr r12,[sp,#120+4] adds r3,r3,r9 + ldr r9,[sp,#192+0] adc r4,r4,r10 - ldr r9,[sp,#192+0] ldr r10,[sp,#192+4] adds r3,r3,r11 adc r4,r4,r12 adds r3,r3,r9 adc r4,r4,r10 - str r3,[sp,#64+0] - str r4,[sp,#64+4] - ldr r11,[sp,#56+0] @ h.lo - ldr r12,[sp,#56+4] @ h.hi @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 mov r9,r7,lsr#14 + str r3,[sp,#64+0] mov r10,r8,lsr#14 + str r4,[sp,#64+4] eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi eor r9,r9,r7,lsr#18 eor r10,r10,r8,lsr#18 eor r9,r9,r8,lsl#14 @@ -276,25 +302,24 @@ sha512_block_data_order: and r10,r10,r8 str r6,[sp,#0+4] eor r9,r9,r11 - ldr r11,[r14,#4] @ K[i].lo + ldr r11,[r14,#LO] @ K[i].lo eor r10,r10,r12 @ Ch(e,f,g) - ldr r12,[r14,#0] @ K[i].hi + ldr r12,[r14,#HI] @ K[i].hi adds r3,r3,r9 ldr r7,[sp,#24+0] @ d.lo adc r4,r4,r10 @ T += Ch(e,f,g) ldr r8,[sp,#24+4] @ d.hi adds r3,r3,r11 + and r9,r11,#0xff adc r4,r4,r12 @ T += K[i] adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo adc r8,r8,r4 @ d += T - - and r9,r11,#0xff teq r9,#23 - orreq r14,r14,#1 - ldr r11,[sp,#8+0] @ b.lo ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 @@ -311,90 +336,91 @@ sha512_block_data_order: eor r9,r9,r5,lsl#25 eor r10,r10,r6,lsl#25 @ Sigma0(a) adds r3,r3,r9 + and r9,r5,r11 adc r4,r4,r10 @ T += Sigma0(a) - and r9,r5,r11 - orr r5,r5,r11 ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 ldr r11,[sp,#16+4] @ c.hi and r5,r5,r12 - orr r5,r5,r9 @ Maj(a,b,c).lo and r12,r6,r10 orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo and r6,r6,r11 - orr r6,r6,r12 @ Maj(a,b,c).hi adds r5,r5,r3 - adc r6,r6,r4 @ h += T - + orr r6,r6,r12 @ Maj(a,b,c).hi sub sp,sp,#8 - add r14,r14,#8 + adc r6,r6,r4 @ h += T tst r14,#1 + add r14,r14,#8 + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] beq .L16_79 bic r14,r14,#1 ldr r3,[sp,#8+0] ldr r4,[sp,#8+4] - ldr r9, [r0,#0+4] - ldr r10, [r0,#0+0] - ldr r11, [r0,#8+4] - ldr r12, [r0,#8+0] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] adds r9,r5,r9 + str r9, [r0,#0+LO] adc r10,r6,r10 + str r10, [r0,#0+HI] adds r11,r3,r11 + str r11, [r0,#8+LO] adc r12,r4,r12 - str r9, [r0,#0+4] - str r10, [r0,#0+0] - str r11, [r0,#8+4] - str r12, [r0,#8+0] + str r12, [r0,#8+HI] ldr r5,[sp,#16+0] ldr r6,[sp,#16+4] ldr r3,[sp,#24+0] ldr r4,[sp,#24+4] - ldr r9, [r0,#16+4] - ldr r10, [r0,#16+0] - ldr r11, [r0,#24+4] - ldr r12, [r0,#24+0] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] adds r9,r5,r9 + str r9, [r0,#16+LO] adc r10,r6,r10 + str r10, [r0,#16+HI] adds r11,r3,r11 + str r11, [r0,#24+LO] adc r12,r4,r12 - str r9, [r0,#16+4] - str r10, [r0,#16+0] - str r11, [r0,#24+4] - str r12, [r0,#24+0] + str r12, [r0,#24+HI] ldr r3,[sp,#40+0] ldr r4,[sp,#40+4] - ldr r9, [r0,#32+4] - ldr r10, [r0,#32+0] - ldr r11, [r0,#40+4] - ldr r12, [r0,#40+0] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] adds r7,r7,r9 + str r7,[r0,#32+LO] adc r8,r8,r10 + str r8,[r0,#32+HI] adds r11,r3,r11 + str r11, [r0,#40+LO] adc r12,r4,r12 - str r7,[r0,#32+4] - str r8,[r0,#32+0] - str r11, [r0,#40+4] - str r12, [r0,#40+0] + str r12, [r0,#40+HI] ldr r5,[sp,#48+0] ldr r6,[sp,#48+4] ldr r3,[sp,#56+0] ldr r4,[sp,#56+4] - ldr r9, [r0,#48+4] - ldr r10, [r0,#48+0] - ldr r11, [r0,#56+4] - ldr r12, [r0,#56+0] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] adds r9,r5,r9 + str r9, [r0,#48+LO] adc r10,r6,r10 + str r10, [r0,#48+HI] adds r11,r3,r11 + str r11, [r0,#56+LO] adc r12,r4,r12 - str r9, [r0,#48+4] - str r10, [r0,#48+0] - str r11, [r0,#56+4] - str r12, [r0,#56+0] + str r12, [r0,#56+HI] add sp,sp,#640 sub r14,r14,#640 @@ -403,10 +429,1355 @@ sha512_block_data_order: bne .Loop add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else ldmia sp!,{r4-r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) -.size sha512_block_data_order,.-sha512_block_data_order -.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by " +#endif +#if __ARM_ARCH__>=7 +.fpu neon + +.align 4 +.LNEON: + dmb @ errata #451034 on early Cortex A8 + vstmdb sp!,{d8-d15} @ ABI specification says so + sub r3,r3,#672 @ K512 + vldmia r0,{d16-d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d0 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d1 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d2 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d3 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d4 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d5 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d6 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d7 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d8 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d9 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d10 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d11 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d12 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d13 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d14 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d15 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d0 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d1 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d2 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d3 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d4 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d5 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d6 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d7 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d8 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d9 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d10 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d11 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d12 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d13 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d14 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d15 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + bne .L16_79_neon + + vldmia r0,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16-d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + vldmia sp!,{d8-d15} @ epilogue + .word 0xe12fff1e +#endif +.size sha512_block_data_order,.-sha512_block_data_order +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/app/openssl/crypto/sha/asm/sha512-mips.pl b/app/openssl/crypto/sha/asm/sha512-mips.pl new file mode 100644 index 00000000..ffa053bb --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-mips.pl @@ -0,0 +1,455 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA2 block procedures for MIPS. + +# October 2010. +# +# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc- +# generated code in o32 build and ~55% in n32/64 build. SHA512 [which +# for now can only be compiled for MIPS64 ISA] improvement is modest +# ~17%, but it comes for free, because it's same instruction sequence. +# Improvement coefficients are for aligned input. + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp [o32 can be +# excluded from the rule, because it's specified volatile]; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $PTR_SLL="dsll"; # incidentally works even on n32 + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $PTR_SLL="sll"; + $SZREG=4; +} +$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; +# +# +# +###################################################################### + +$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; + +for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } + +if ($output =~ /512/) { + $label="512"; + $SZ=8; + $LD="ld"; # load from memory + $ST="sd"; # store to memory + $SLL="dsll"; # shift left logical + $SRL="dsrl"; # shift right logical + $ADDU="daddu"; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=( 7, 1, 8); # right shift first + @sigma1=( 6,19,61); # right shift first + $lastK=0x817; + $rounds=80; +} else { + $label="256"; + $SZ=4; + $LD="lw"; # load from memory + $ST="sw"; # store to memory + $SLL="sll"; # shift left logical + $SRL="srl"; # shift right logical + $ADDU="addu"; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 3, 7,18); # right shift first + @sigma1=(10,17,19); # right shift first + $lastK=0x8f2; + $rounds=64; +} + +$MSB = $big_endian ? 0 : ($SZ-1); +$LSB = ($SZ-1)&~$MSB; + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31)); +@X=map("\$$_",(8..23)); + +$ctx=$a0; +$inp=$a1; +$len=$a2; $Ktbl=$len; + +sub BODY_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]); + +$code.=<<___ if ($i<15); + ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp) + ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) +___ +$code.=<<___ if (!$big_endian && $i<16 && $SZ==4); + srl $tmp0,@X[0],24 # byte swap($i) + srl $tmp1,@X[0],8 + andi $tmp2,@X[0],0xFF00 + sll @X[0],@X[0],24 + andi $tmp1,0xFF00 + sll $tmp2,$tmp2,8 + or @X[0],$tmp0 + or $tmp1,$tmp2 + or @X[0],$tmp1 +___ +$code.=<<___ if (!$big_endian && $i<16 && $SZ==8); + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + and $tmp1,@X[0],$tmp0 # byte swap($i) + dsrl $tmp2,@X[0],24 + dsll $tmp1,24 + and $tmp2,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + and $tmp2,@X[0],$tmp0 + dsrl @X[0],8 + dsll $tmp2,8 + and @X[0],$tmp0 + or $tmp1,$tmp2 + or @X[0],$tmp1 + dsrl $tmp1,@X[0],32 + dsll @X[0],32 + or @X[0],$tmp1 +___ +$code.=<<___; + $ADDU $T1,$X[0],$h # $i + $SRL $h,$e,@Sigma1[0] + xor $tmp2,$f,$g + $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]` + and $tmp2,$e + $SRL $tmp0,$e,@Sigma1[1] + xor $h,$tmp1 + $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]` + xor $h,$tmp0 + $SRL $tmp0,$e,@Sigma1[2] + xor $h,$tmp1 + $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]` + xor $h,$tmp0 + xor $tmp2,$g # Ch(e,f,g) + xor $tmp0,$tmp1,$h # Sigma1(e) + + $SRL $h,$a,@Sigma0[0] + $ADDU $T1,$tmp2 + $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] + $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]` + $ADDU $T1,$tmp0 + $SRL $tmp0,$a,@Sigma0[1] + xor $h,$tmp1 + $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]` + xor $h,$tmp0 + $SRL $tmp0,$a,@Sigma0[2] + xor $h,$tmp1 + $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` + xor $h,$tmp0 + $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + xor $h,$tmp1 # Sigma0(a) + + or $tmp0,$a,$b + and $tmp1,$a,$b + and $tmp0,$c + or $tmp1,$tmp0 # Maj(a,b,c) + $ADDU $T1,$tmp2 # +=K[$i] + $ADDU $h,$tmp1 + + $ADDU $d,$T1 + $ADDU $h,$T1 +___ +$code.=<<___ if ($i>=13); + $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer +___ +} + +sub BODY_16_XX { +my $i=@_[0]; +my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); + +$code.=<<___; + $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) + $ADDU @X[0],@X[9] # +=X[i+9] + $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` + $SRL $tmp0,@X[1],@sigma0[1] + xor $tmp2,$tmp1 + $SLL $tmp1,`@sigma0[2]-@sigma0[1]` + xor $tmp2,$tmp0 + $SRL $tmp0,@X[1],@sigma0[2] + xor $tmp2,$tmp1 + + $SRL $tmp3,@X[14],@sigma1[0] + xor $tmp2,$tmp0 # sigma0(X[i+1]) + $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]` + $ADDU @X[0],$tmp2 + $SRL $tmp0,@X[14],@sigma1[1] + xor $tmp3,$tmp1 + $SLL $tmp1,`@sigma1[2]-@sigma1[1]` + xor $tmp3,$tmp0 + $SRL $tmp0,@X[14],@sigma1[2] + xor $tmp3,$tmp1 + + xor $tmp3,$tmp0 # sigma1(X[i+14]) + $ADDU @X[0],$tmp3 +___ + &BODY_00_15(@_); +} + +$FRAMESIZE=16*$SZ+16*$SZREG; +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; + +$code.=<<___; +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +.text +.set noat +#if !defined(__vxworks) || defined(__pic__) +.option pic2 +#endif + +.align 5 +.globl sha${label}_block_data_order +.ent sha${label}_block_data_order +sha${label}_block_data_order: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_S $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_S $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_S $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)` +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Ktbl + .cpsetup $pf,$zero,sha${label}_block_data_order +___ +$code.=<<___; + .set reorder + la $Ktbl,K${label} # PIC-ified 'load address' + + $LD $A,0*$SZ($ctx) # load context + $LD $B,1*$SZ($ctx) + $LD $C,2*$SZ($ctx) + $LD $D,3*$SZ($ctx) + $LD $E,4*$SZ($ctx) + $LD $F,5*$SZ($ctx) + $LD $G,6*$SZ($ctx) + $LD $H,7*$SZ($ctx) + + $PTR_ADD @X[15],$inp # pointer to the end of input + $REG_S @X[15],16*$SZ($sp) + b .Loop + +.align 5 +.Loop: + ${LD}l @X[0],$MSB($inp) + ${LD}r @X[0],$LSB($inp) +___ +for ($i=0;$i<16;$i++) +{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } +$code.=<<___; + b .L16_xx +.align 4 +.L16_xx: +___ +for (;$i<32;$i++) +{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } +$code.=<<___; + and @X[6],0xfff + li @X[7],$lastK + .set noreorder + bne @X[6],@X[7],.L16_xx + $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16 + + $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input + $LD @X[0],0*$SZ($ctx) + $LD @X[1],1*$SZ($ctx) + $LD @X[2],2*$SZ($ctx) + $PTR_ADD $inp,16*$SZ + $LD @X[3],3*$SZ($ctx) + $ADDU $A,@X[0] + $LD @X[4],4*$SZ($ctx) + $ADDU $B,@X[1] + $LD @X[5],5*$SZ($ctx) + $ADDU $C,@X[2] + $LD @X[6],6*$SZ($ctx) + $ADDU $D,@X[3] + $LD @X[7],7*$SZ($ctx) + $ADDU $E,@X[4] + $ST $A,0*$SZ($ctx) + $ADDU $F,@X[5] + $ST $B,1*$SZ($ctx) + $ADDU $G,@X[6] + $ST $C,2*$SZ($ctx) + $ADDU $H,@X[7] + $ST $D,3*$SZ($ctx) + $ST $E,4*$SZ($ctx) + $ST $F,5*$SZ($ctx) + $ST $G,6*$SZ($ctx) + $ST $H,7*$SZ($ctx) + + bne $inp,@X[15],.Loop + $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl + + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end sha${label}_block_data_order + +.rdata +.align 5 +K${label}: +___ +if ($SZ==4) { +$code.=<<___; + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +___ +} else { +$code.=<<___; + .dword 0x428a2f98d728ae22, 0x7137449123ef65cd + .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc + .dword 0x3956c25bf348b538, 0x59f111f1b605d019 + .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 + .dword 0xd807aa98a3030242, 0x12835b0145706fbe + .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 + .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 + .dword 0x9bdc06a725c71235, 0xc19bf174cf692694 + .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 + .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 + .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 + .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 + .dword 0x983e5152ee66dfab, 0xa831c66d2db43210 + .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4 + .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725 + .dword 0x06ca6351e003826f, 0x142929670a0e6e70 + .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926 + .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df + .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8 + .dword 0x81c2c92e47edaee6, 0x92722c851482353b + .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001 + .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30 + .dword 0xd192e819d6ef5218, 0xd69906245565a910 + .dword 0xf40e35855771202a, 0x106aa07032bbd1b8 + .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 + .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 + .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb + .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 + .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60 + .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec + .dword 0x90befffa23631e28, 0xa4506cebde82bde9 + .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b + .dword 0xca273eceea26619c, 0xd186b8c721c0c207 + .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 + .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6 + .dword 0x113f9804bef90dae, 0x1b710b35131c471b + .dword 0x28db77f523047d84, 0x32caab7b40c72493 + .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c + .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a + .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 +___ +} +$code.=<<___; +.asciiz "SHA${label} for MIPS, CRYPTOGAMS by " +.align 5 + +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha512-parisc.pl b/app/openssl/crypto/sha/asm/sha512-parisc.pl new file mode 100755 index 00000000..fc0e15b3 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-parisc.pl @@ -0,0 +1,793 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA256/512 block procedure for PA-RISC. + +# June 2009. +# +# SHA256 performance is >75% better than gcc 3.2 generated code on +# PA-7100LC. Compared to code generated by vendor compiler this +# implementation is almost 70% faster in 64-bit build, but delivers +# virtually same performance in 32-bit build on PA-8600. +# +# SHA512 performance is >2.9x better than gcc 3.2 generated code on +# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the +# code is executed on PA-RISC 2.0 processor and switches to 64-bit +# code path delivering adequate peformance even in "blended" 32-bit +# build. Though 64-bit code is not any faster than code generated by +# vendor compiler on PA-8600... +# +# Special thanks to polarhome.com for providing HP-UX account. + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; +} else { + $LEVEL ="1.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; +} + +if ($output =~ /512/) { + $func="sha512_block_data_order"; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; + $LAST10BITS=0x017; + $LD="ldd"; + $LDM="ldd,ma"; + $ST="std"; +} else { + $func="sha256_block_data_order"; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; + $LAST10BITS=0x0f2; + $LD="ldw"; + $LDM="ldwm"; + $ST="stw"; +} + +$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker + # [+ argument transfer] +$XOFF=16*$SZ+32; # local variables +$FRAME+=$XOFF; +$XOFF+=$FRAME_MARKER; # distance between %sp and local variables + +$ctx="%r26"; # zapped by $a0 +$inp="%r25"; # zapped by $a1 +$num="%r24"; # zapped by $t0 + +$a0 ="%r26"; +$a1 ="%r25"; +$t0 ="%r24"; +$t1 ="%r29"; +$Tbl="%r31"; + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); + +@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", + "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); + +sub ROUND_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +$code.=<<___; + _ror $e,$Sigma1[0],$a0 + and $f,$e,$t0 + _ror $e,$Sigma1[1],$a1 + addl $t1,$h,$h + andcm $g,$e,$t1 + xor $a1,$a0,$a0 + _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 + or $t0,$t1,$t1 ; Ch(e,f,g) + addl @X[$i%16],$h,$h + xor $a0,$a1,$a1 ; Sigma1(e) + addl $t1,$h,$h + _ror $a,$Sigma0[0],$a0 + addl $a1,$h,$h + + _ror $a,$Sigma0[1],$a1 + and $a,$b,$t0 + and $a,$c,$t1 + xor $a1,$a0,$a0 + _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 + xor $t1,$t0,$t0 + and $b,$c,$t1 + xor $a0,$a1,$a1 ; Sigma0(a) + addl $h,$d,$d + xor $t1,$t0,$t0 ; Maj(a,b,c) + `"$LDM $SZ($Tbl),$t1" if ($i<15)` + addl $a1,$h,$h + addl $t0,$h,$h + +___ +} + +sub ROUND_16_xx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +$i-=16; +$code.=<<___; + _ror @X[($i+1)%16],$sigma0[0],$a0 + _ror @X[($i+1)%16],$sigma0[1],$a1 + addl @X[($i+9)%16],@X[$i],@X[$i] + _ror @X[($i+14)%16],$sigma1[0],$t0 + _ror @X[($i+14)%16],$sigma1[1],$t1 + xor $a1,$a0,$a0 + _shr @X[($i+1)%16],$sigma0[2],$a1 + xor $t1,$t0,$t0 + _shr @X[($i+14)%16],$sigma1[2],$t1 + xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) + xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) + $LDM $SZ($Tbl),$t1 + addl $a0,@X[$i],@X[$i] + addl $t0,@X[$i],@X[$i] +___ +$code.=<<___ if ($i==15); + extru $t1,31,10,$a1 + comiclr,<> $LAST10BITS,$a1,%r0 + ldo 1($Tbl),$Tbl ; signal end of $Tbl +___ +&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); +} + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .ALIGN 64 +L\$table +___ +$code.=<<___ if ($SZ==8); + .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd + .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc + .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 + .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 + .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe + .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 + .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 + .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 + .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 + .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 + .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 + .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 + .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 + .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 + .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 + .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 + .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 + .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df + .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 + .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b + .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 + .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 + .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 + .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 + .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 + .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 + .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb + .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 + .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 + .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec + .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 + .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b + .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 + .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 + .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 + .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b + .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 + .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c + .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a + .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 +___ +$code.=<<___ if ($SZ==4); + .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +___ +$code.=<<___; + + .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR + .ALIGN 64 +$func + .PROC + .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) + $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) + $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) + $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) + $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) + $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) + $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) + $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) + + _shl $num,`log(16*$SZ)/log(2)`,$num + addl $inp,$num,$num ; $num to point at the end of $inp + + $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments + $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) + $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) + + blr %r0,$Tbl + ldi 3,$t1 +L\$pic + andcm $Tbl,$t1,$Tbl ; wipe privilege level + ldo L\$table-L\$pic($Tbl),$Tbl +___ +$code.=<<___ if ($SZ==8 && $SIZE_T==4); + ldi 31,$t1 + mtctl $t1,%cr11 + extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 + b L\$parisc1 + nop +___ +$code.=<<___; + $LD `0*$SZ`($ctx),$A ; load context + $LD `1*$SZ`($ctx),$B + $LD `2*$SZ`($ctx),$C + $LD `3*$SZ`($ctx),$D + $LD `4*$SZ`($ctx),$E + $LD `5*$SZ`($ctx),$F + $LD `6*$SZ`($ctx),$G + $LD `7*$SZ`($ctx),$H + + extru $inp,31,`log($SZ)/log(2)`,$t0 + sh3addl $t0,%r0,$t0 + subi `8*$SZ`,$t0,$t0 + mtctl $t0,%cr11 ; load %sar with align factor + +L\$oop + ldi `$SZ-1`,$t0 + $LDM $SZ($Tbl),$t1 + andcm $inp,$t0,$t0 ; align $inp +___ + for ($i=0;$i<15;$i++) { # load input block + $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } +$code.=<<___; + cmpb,*= $inp,$t0,L\$aligned + $LD `$SZ*15`($t0),@X[15] + $LD `$SZ*16`($t0),@X[16] +___ + for ($i=0;$i<16;$i++) { # align data + $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } +$code.=<<___; +L\$aligned + nop ; otherwise /usr/ccs/bin/as is confused by below .WORD +___ + +for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +L\$rounds + nop ; otherwise /usr/ccs/bin/as is confused by below .WORD +___ +for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? + nop + + $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments + $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp + $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num + ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl + + $LD `0*$SZ`($ctx),@X[0] ; load context + $LD `1*$SZ`($ctx),@X[1] + $LD `2*$SZ`($ctx),@X[2] + $LD `3*$SZ`($ctx),@X[3] + $LD `4*$SZ`($ctx),@X[4] + $LD `5*$SZ`($ctx),@X[5] + addl @X[0],$A,$A + $LD `6*$SZ`($ctx),@X[6] + addl @X[1],$B,$B + $LD `7*$SZ`($ctx),@X[7] + ldo `16*$SZ`($inp),$inp ; advance $inp + + $ST $A,`0*$SZ`($ctx) ; save context + addl @X[2],$C,$C + $ST $B,`1*$SZ`($ctx) + addl @X[3],$D,$D + $ST $C,`2*$SZ`($ctx) + addl @X[4],$E,$E + $ST $D,`3*$SZ`($ctx) + addl @X[5],$F,$F + $ST $E,`4*$SZ`($ctx) + addl @X[6],$G,$G + $ST $F,`5*$SZ`($ctx) + addl @X[7],$H,$H + $ST $G,`6*$SZ`($ctx) + $ST $H,`7*$SZ`($ctx) + + cmpb,*<>,n $inp,$num,L\$oop + $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp +___ +if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 +{{ +$code.=<<___; + b L\$done + nop + + .ALIGN 64 +L\$parisc1 +___ + +@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, + $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = + ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", + "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); +$a0 ="%r17"; +$a1 ="%r18"; +$a2 ="%r19"; +$a3 ="%r20"; +$t0 ="%r21"; +$t1 ="%r22"; +$t2 ="%r28"; +$t3 ="%r29"; +$Tbl="%r31"; + +@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx + +sub ROUND_00_15_pa1 { +my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, + $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; +my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; + +$code.=<<___ if (!$flag); + ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi + ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] +___ +$code.=<<___; + shd $ehi,$elo,$Sigma1[0],$t0 + add $Xlo,$hlo,$hlo + shd $elo,$ehi,$Sigma1[0],$t1 + addc $Xhi,$hhi,$hhi ; h += X[i] + shd $ehi,$elo,$Sigma1[1],$t2 + ldwm 8($Tbl),$Xhi + shd $elo,$ehi,$Sigma1[1],$t3 + ldw -4($Tbl),$Xlo ; load K[i] + xor $t2,$t0,$t0 + xor $t3,$t1,$t1 + and $flo,$elo,$a0 + and $fhi,$ehi,$a1 + shd $ehi,$elo,$Sigma1[2],$t2 + andcm $glo,$elo,$a2 + shd $elo,$ehi,$Sigma1[2],$t3 + andcm $ghi,$ehi,$a3 + xor $t2,$t0,$t0 + xor $t3,$t1,$t1 ; Sigma1(e) + add $Xlo,$hlo,$hlo + xor $a2,$a0,$a0 + addc $Xhi,$hhi,$hhi ; h += K[i] + xor $a3,$a1,$a1 ; Ch(e,f,g) + + add $t0,$hlo,$hlo + shd $ahi,$alo,$Sigma0[0],$t0 + addc $t1,$hhi,$hhi ; h += Sigma1(e) + shd $alo,$ahi,$Sigma0[0],$t1 + add $a0,$hlo,$hlo + shd $ahi,$alo,$Sigma0[1],$t2 + addc $a1,$hhi,$hhi ; h += Ch(e,f,g) + shd $alo,$ahi,$Sigma0[1],$t3 + + xor $t2,$t0,$t0 + xor $t3,$t1,$t1 + shd $ahi,$alo,$Sigma0[2],$t2 + and $alo,$blo,$a0 + shd $alo,$ahi,$Sigma0[2],$t3 + and $ahi,$bhi,$a1 + xor $t2,$t0,$t0 + xor $t3,$t1,$t1 ; Sigma0(a) + + and $alo,$clo,$a2 + and $ahi,$chi,$a3 + xor $a2,$a0,$a0 + add $hlo,$dlo,$dlo + xor $a3,$a1,$a1 + addc $hhi,$dhi,$dhi ; d += h + and $blo,$clo,$a2 + add $t0,$hlo,$hlo + and $bhi,$chi,$a3 + addc $t1,$hhi,$hhi ; h += Sigma0(a) + xor $a2,$a0,$a0 + add $a0,$hlo,$hlo + xor $a3,$a1,$a1 ; Maj(a,b,c) + addc $a1,$hhi,$hhi ; h += Maj(a,b,c) + +___ +$code.=<<___ if ($i==15 && $flag); + extru $Xlo,31,10,$Xlo + comiclr,= $LAST10BITS,$Xlo,%r0 + b L\$rounds_pa1 + nop +___ +push(@X,shift(@X)); push(@X,shift(@X)); +} + +sub ROUND_16_xx_pa1 { +my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; +my ($i)=shift; +$i-=16; +$code.=<<___; + ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi + ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] + ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 + ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] + ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 + ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] + shd $Xnhi,$Xnlo,$sigma0[0],$t0 + shd $Xnlo,$Xnhi,$sigma0[0],$t1 + add $a0,$Xlo,$Xlo + shd $Xnhi,$Xnlo,$sigma0[1],$t2 + addc $a1,$Xhi,$Xhi + shd $Xnlo,$Xnhi,$sigma0[1],$t3 + xor $t2,$t0,$t0 + shd $Xnhi,$Xnlo,$sigma0[2],$t2 + xor $t3,$t1,$t1 + extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 + xor $t2,$t0,$t0 + shd $a3,$a2,$sigma1[0],$a0 + xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) + shd $a2,$a3,$sigma1[0],$a1 + add $t0,$Xlo,$Xlo + shd $a3,$a2,$sigma1[1],$t2 + addc $t1,$Xhi,$Xhi + shd $a2,$a3,$sigma1[1],$t3 + xor $t2,$a0,$a0 + shd $a3,$a2,$sigma1[2],$t2 + xor $t3,$a1,$a1 + extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 + xor $t2,$a0,$a0 + xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) + add $a0,$Xlo,$Xlo + addc $a1,$Xhi,$Xhi + + stw $Xhi,`-$XOFF+8*($i%16)`(%sp) + stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) +___ +&ROUND_00_15_pa1($i,@_,1); +} +$code.=<<___; + ldw `0*4`($ctx),$Ahi ; load context + ldw `1*4`($ctx),$Alo + ldw `2*4`($ctx),$Bhi + ldw `3*4`($ctx),$Blo + ldw `4*4`($ctx),$Chi + ldw `5*4`($ctx),$Clo + ldw `6*4`($ctx),$Dhi + ldw `7*4`($ctx),$Dlo + ldw `8*4`($ctx),$Ehi + ldw `9*4`($ctx),$Elo + ldw `10*4`($ctx),$Fhi + ldw `11*4`($ctx),$Flo + ldw `12*4`($ctx),$Ghi + ldw `13*4`($ctx),$Glo + ldw `14*4`($ctx),$Hhi + ldw `15*4`($ctx),$Hlo + + extru $inp,31,2,$t0 + sh3addl $t0,%r0,$t0 + subi 32,$t0,$t0 + mtctl $t0,%cr11 ; load %sar with align factor + +L\$oop_pa1 + extru $inp,31,2,$a3 + comib,= 0,$a3,L\$aligned_pa1 + sub $inp,$a3,$inp + + ldw `0*4`($inp),$X[0] + ldw `1*4`($inp),$X[1] + ldw `2*4`($inp),$t2 + ldw `3*4`($inp),$t3 + ldw `4*4`($inp),$a0 + ldw `5*4`($inp),$a1 + ldw `6*4`($inp),$a2 + ldw `7*4`($inp),$a3 + vshd $X[0],$X[1],$X[0] + vshd $X[1],$t2,$X[1] + stw $X[0],`-$XOFF+0*4`(%sp) + ldw `8*4`($inp),$t0 + vshd $t2,$t3,$t2 + stw $X[1],`-$XOFF+1*4`(%sp) + ldw `9*4`($inp),$t1 + vshd $t3,$a0,$t3 +___ +{ +my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); +for ($i=2;$i<=(128/4-8);$i++) { +$code.=<<___; + stw $t[0],`-$XOFF+$i*4`(%sp) + ldw `(8+$i)*4`($inp),$t[0] + vshd $t[1],$t[2],$t[1] +___ +push(@t,shift(@t)); +} +for (;$i<(128/4-1);$i++) { +$code.=<<___; + stw $t[0],`-$XOFF+$i*4`(%sp) + vshd $t[1],$t[2],$t[1] +___ +push(@t,shift(@t)); +} +$code.=<<___; + b L\$collected_pa1 + stw $t[0],`-$XOFF+$i*4`(%sp) + +___ +} +$code.=<<___; +L\$aligned_pa1 + ldw `0*4`($inp),$X[0] + ldw `1*4`($inp),$X[1] + ldw `2*4`($inp),$t2 + ldw `3*4`($inp),$t3 + ldw `4*4`($inp),$a0 + ldw `5*4`($inp),$a1 + ldw `6*4`($inp),$a2 + ldw `7*4`($inp),$a3 + stw $X[0],`-$XOFF+0*4`(%sp) + ldw `8*4`($inp),$t0 + stw $X[1],`-$XOFF+1*4`(%sp) + ldw `9*4`($inp),$t1 +___ +{ +my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); +for ($i=2;$i<(128/4-8);$i++) { +$code.=<<___; + stw $t[0],`-$XOFF+$i*4`(%sp) + ldw `(8+$i)*4`($inp),$t[0] +___ +push(@t,shift(@t)); +} +for (;$i<128/4;$i++) { +$code.=<<___; + stw $t[0],`-$XOFF+$i*4`(%sp) +___ +push(@t,shift(@t)); +} +$code.="L\$collected_pa1\n"; +} + +for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } +$code.="L\$rounds_pa1\n"; +for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } + +$code.=<<___; + $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments + $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp + $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num + ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl + + ldw `0*4`($ctx),$t1 ; update context + ldw `1*4`($ctx),$t0 + ldw `2*4`($ctx),$t3 + ldw `3*4`($ctx),$t2 + ldw `4*4`($ctx),$a1 + ldw `5*4`($ctx),$a0 + ldw `6*4`($ctx),$a3 + add $t0,$Alo,$Alo + ldw `7*4`($ctx),$a2 + addc $t1,$Ahi,$Ahi + ldw `8*4`($ctx),$t1 + add $t2,$Blo,$Blo + ldw `9*4`($ctx),$t0 + addc $t3,$Bhi,$Bhi + ldw `10*4`($ctx),$t3 + add $a0,$Clo,$Clo + ldw `11*4`($ctx),$t2 + addc $a1,$Chi,$Chi + ldw `12*4`($ctx),$a1 + add $a2,$Dlo,$Dlo + ldw `13*4`($ctx),$a0 + addc $a3,$Dhi,$Dhi + ldw `14*4`($ctx),$a3 + add $t0,$Elo,$Elo + ldw `15*4`($ctx),$a2 + addc $t1,$Ehi,$Ehi + stw $Ahi,`0*4`($ctx) + add $t2,$Flo,$Flo + stw $Alo,`1*4`($ctx) + addc $t3,$Fhi,$Fhi + stw $Bhi,`2*4`($ctx) + add $a0,$Glo,$Glo + stw $Blo,`3*4`($ctx) + addc $a1,$Ghi,$Ghi + stw $Chi,`4*4`($ctx) + add $a2,$Hlo,$Hlo + stw $Clo,`5*4`($ctx) + addc $a3,$Hhi,$Hhi + stw $Dhi,`6*4`($ctx) + ldo `16*$SZ`($inp),$inp ; advance $inp + stw $Dlo,`7*4`($ctx) + stw $Ehi,`8*4`($ctx) + stw $Elo,`9*4`($ctx) + stw $Fhi,`10*4`($ctx) + stw $Flo,`11*4`($ctx) + stw $Ghi,`12*4`($ctx) + stw $Glo,`13*4`($ctx) + stw $Hhi,`14*4`($ctx) + comb,= $inp,$num,L\$done + stw $Hlo,`15*4`($ctx) + b L\$oop_pa1 + $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp +L\$done +___ +}} +$code.=<<___; + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 + $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 + $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 + $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 + $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 + $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 + $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 + $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by " +___ + +# Explicitly encode PA-RISC 2.0 instructions used in this module, so +# that it can be compiled with .LEVEL 1.0. It should be noted that I +# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 +# directive... + +my $ldd = sub { + my ($mod,$args) = @_; + my $orig = "ldd$mod\t$args"; + + if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices + { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); + $opcode|=(1<<3) if ($mod =~ /^,m/); + $opcode|=(1<<2) if ($mod =~ /^,mb/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $std = sub { + my ($mod,$args) = @_; + my $orig = "std$mod\t$args"; + + if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices + { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $extrd = sub { + my ($mod,$args) = @_; + my $orig = "extrd$mod\t$args"; + + # I only have ",u" completer, it's implicitly encoded... + if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 + { my $opcode=(0x36<<26)|($1<<21)|($4<<16); + my $len=32-$3; + $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos + $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 + { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); + my $len=32-$2; + $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len + $opcode |= (1<<13) if ($mod =~ /,\**=/); + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + else { "\t".$orig; } +}; + +my $shrpd = sub { + my ($mod,$args) = @_; + my $orig = "shrpd$mod\t$args"; + + if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 + { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; + my $cpos=63-$3; + $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa + sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; + } + elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 + { sprintf "\t.WORD\t0x%08x\t; %s", + (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; + } + else { "\t".$orig; } +}; + +sub assemble { + my ($mnemonic,$mod,$args)=@_; + my $opcode = eval("\$$mnemonic"); + + ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ + $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 + : sprintf("shd\t%$1,%$2,%d",$3)/e or + # translate made up instructons: _ror, _shr, _align, _shl + s/_ror(\s+)(%r[0-9]+),/ + ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or + + s/_shr(\s+%r[0-9]+),([0-9]+),/ + $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) + : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or + + s/_align(\s+%r[0-9]+,%r[0-9]+),/ + ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or + + s/_shl(\s+%r[0-9]+),([0-9]+),/ + $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) + : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; + + s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); + + s/cmpb,\*/comb,/ if ($SIZE_T==4); + + s/\bbv\b/bve/ if ($SIZE_T==8); + + print $_,"\n"; +} + +close STDOUT; diff --git a/app/openssl/crypto/sha/asm/sha512-ppc.pl b/app/openssl/crypto/sha/asm/sha512-ppc.pl index 768a6a6f..6b44a68e 100755 --- a/app/openssl/crypto/sha/asm/sha512-ppc.pl +++ b/app/openssl/crypto/sha/asm/sha512-ppc.pl @@ -40,6 +40,7 @@ $output =shift; if ($flavour =~ /64/) { $SIZE_T=8; + $LRSAVE=2*$SIZE_T; $STU="stdu"; $UCMP="cmpld"; $SHL="sldi"; @@ -47,6 +48,7 @@ if ($flavour =~ /64/) { $PUSH="std"; } elsif ($flavour =~ /32/) { $SIZE_T=4; + $LRSAVE=$SIZE_T; $STU="stwu"; $UCMP="cmplw"; $SHL="slwi"; @@ -87,7 +89,8 @@ if ($output =~ /512/) { $SHR="srwi"; } -$FRAME=32*$SIZE_T; +$FRAME=32*$SIZE_T+16*$SZ; +$LOCALS=6*$SIZE_T; $sp ="r1"; $toc="r2"; @@ -179,13 +182,12 @@ $code=<<___; .globl $func .align 6 $func: + $STU $sp,-$FRAME($sp) mflr r0 - $STU $sp,`-($FRAME+16*$SZ)`($sp) $SHL $num,$num,`log(16*$SZ)/log(2)` $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) @@ -206,6 +208,7 @@ $func: $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) $LD $A,`0*$SZ`($ctx) mr $inp,r4 ; incarnate $inp @@ -217,7 +220,7 @@ $func: $LD $G,`6*$SZ`($ctx) $LD $H,`7*$SZ`($ctx) - b LPICmeup + bl LPICmeup LPICedup: andi. r0,$inp,3 bne Lunaligned @@ -226,40 +229,14 @@ Laligned: $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer bl Lsha2_block_private -Ldone: - $POP r0,`$FRAME-$SIZE_T*21`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,`$FRAME+16*$SZ` - blr -___ + b Ldone -# PowerPC specification allows an implementation to be ill-behaved -# upon unaligned access which crosses page boundary. "Better safe -# than sorry" principle makes me treat it specially. But I don't -# look for particular offending word, but rather for the input -# block which crosses the boundary. Once found that block is aligned -# and hashed separately... -$code.=<<___; +; PowerPC specification allows an implementation to be ill-behaved +; upon unaligned access which crosses page boundary. "Better safe +; than sorry" principle makes me treat it specially. But I don't +; look for particular offending word, but rather for the input +; block which crosses the boundary. Once found that block is aligned +; and hashed separately... .align 4 Lunaligned: subfic $t1,$inp,4096 @@ -278,7 +255,7 @@ Lunaligned: Lcross_page: li $t1,`16*$SZ/4` mtctr $t1 - addi r20,$sp,$FRAME ; aligned spot below the frame + addi r20,$sp,$LOCALS ; aligned spot below the frame Lmemcpy: lbz r16,0($inp) lbz r17,1($inp) @@ -293,8 +270,8 @@ Lmemcpy: bdnz Lmemcpy $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp - addi $t1,$sp,`$FRAME+16*$SZ` ; fictitious end pointer - addi $inp,$sp,$FRAME ; fictitious inp pointer + addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer + addi $inp,$sp,$LOCALS ; fictitious inp pointer $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer @@ -303,10 +280,36 @@ Lmemcpy: $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num addic. $num,$num,`-16*$SZ` ; num-- bne- Lunaligned - b Ldone -___ -$code.=<<___; +Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) + $POP $toc,`$FRAME-$SIZE_T*20`($sp) + $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) + $POP r17,`$FRAME-$SIZE_T*15`($sp) + $POP r18,`$FRAME-$SIZE_T*14`($sp) + $POP r19,`$FRAME-$SIZE_T*13`($sp) + $POP r20,`$FRAME-$SIZE_T*12`($sp) + $POP r21,`$FRAME-$SIZE_T*11`($sp) + $POP r22,`$FRAME-$SIZE_T*10`($sp) + $POP r23,`$FRAME-$SIZE_T*9`($sp) + $POP r24,`$FRAME-$SIZE_T*8`($sp) + $POP r25,`$FRAME-$SIZE_T*7`($sp) + $POP r26,`$FRAME-$SIZE_T*6`($sp) + $POP r27,`$FRAME-$SIZE_T*5`($sp) + $POP r28,`$FRAME-$SIZE_T*4`($sp) + $POP r29,`$FRAME-$SIZE_T*3`($sp) + $POP r30,`$FRAME-$SIZE_T*2`($sp) + $POP r31,`$FRAME-$SIZE_T*1`($sp) + mtlr r0 + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 + .align 4 Lsha2_block_private: ___ @@ -372,6 +375,8 @@ $code.=<<___; $ST $H,`7*$SZ`($ctx) bne Lsha2_block_private blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ___ # Ugly hack here, because PPC assembler syntax seem to vary too @@ -379,22 +384,15 @@ ___ $code.=<<___; .align 6 LPICmeup: - bl LPIC - addi $Tbl,$Tbl,`64-4` ; "distance" between . and last nop - b LPICedup - nop - nop - nop - nop - nop -LPIC: mflr $Tbl + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 blr - nop - nop - nop - nop - nop - nop + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` ___ $code.=<<___ if ($SZ==8); .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd diff --git a/app/openssl/crypto/sha/asm/sha512-s390x.pl b/app/openssl/crypto/sha/asm/sha512-s390x.pl index e7ef2d5a..079a3fc7 100644 --- a/app/openssl/crypto/sha/asm/sha512-s390x.pl +++ b/app/openssl/crypto/sha/asm/sha512-s390x.pl @@ -26,6 +26,26 @@ # favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster # than software. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z900 SHA256 was measured to +# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + $t0="%r0"; $t1="%r1"; $ctx="%r2"; $t2="%r2"; @@ -44,7 +64,7 @@ $tbl="%r13"; $T1="%r14"; $sp="%r15"; -$output=shift; +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; if ($output =~ /512/) { @@ -78,7 +98,8 @@ if ($output =~ /512/) { } $Func="sha${label}_block_data_order"; $Table="K${label}"; -$frame=160+16*$SZ; +$stdframe=16*$SIZE_T+4*8; +$frame=$stdframe+16*$SZ; sub BODY_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; @@ -93,9 +114,9 @@ $code.=<<___; xgr $t0,$t1 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` xgr $t2,$g - $ST $T1,`160+$SZ*($i%16)`($sp) + $ST $T1,`$stdframe+$SZ*($i%16)`($sp) xgr $t0,$t1 # Sigma1(e) - la $T1,0($T1,$h) # T1+=h + algr $T1,$h # T1+=h ngr $t2,$e lgr $t1,$a algr $T1,$t0 # T1+=Sigma1(e) @@ -113,7 +134,7 @@ $code.=<<___; ngr $t2,$b algr $h,$T1 # h+=T1 ogr $t2,$t1 # Maj(a,b,c) - la $d,0($d,$T1) # d+=T1 + algr $d,$T1 # d+=T1 algr $h,$t2 # h+=Maj(a,b,c) ___ } @@ -122,19 +143,19 @@ sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i - $LD $t1,`160+$SZ*(($i+14)%16)`($sp) + $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i + $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp) $ROT $t0,$T1,$sigma0[0] $SHR $T1,$sigma0[2] $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` xgr $T1,$t0 $ROT $t0,$t1,$sigma1[0] - xgr $T1,$t2 # sigma0(X[i+1]) + xgr $T1,$t2 # sigma0(X[i+1]) $SHR $t1,$sigma1[2] - $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] + $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i] xgr $t1,$t0 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` - $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] + $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9] xgr $t1,$t0 # sigma1(X[i+14]) algr $T1,$t1 # +=sigma1(X[i+14]) ___ @@ -212,6 +233,7 @@ $code.=<<___; .globl $Func .type $Func,\@function $Func: + sllg $len,$len,`log(16*$SZ)/log(2)` ___ $code.=<<___ if ($kimdfunc); larl %r1,OPENSSL_s390xcap_P @@ -219,15 +241,15 @@ $code.=<<___ if ($kimdfunc); tmhl %r0,0x4000 # check for message-security assist jz .Lsoftware lghi %r0,0 - la %r1,16($sp) + la %r1,`2*$SIZE_T`($sp) .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,16($sp) + lg %r0,`2*$SIZE_T`($sp) tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc lgr %r1,$ctx lgr %r2,$inp - sllg %r3,$len,`log(16*$SZ)/log(2)` + lgr %r3,$len .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 @@ -235,13 +257,12 @@ $code.=<<___ if ($kimdfunc); .Lsoftware: ___ $code.=<<___; - sllg $len,$len,`log(16*$SZ)/log(2)` lghi %r1,-$frame - agr $len,$inp - stmg $ctx,%r15,16($sp) + la $len,0($len,$inp) + stm${g} $ctx,%r15,`2*$SIZE_T`($sp) lgr %r0,$sp la $sp,0(%r1,$sp) - stg %r0,0($sp) + st${g} %r0,0($sp) larl $tbl,$Table $LD $A,`0*$SZ`($ctx) @@ -265,7 +286,7 @@ $code.=<<___; clgr $len,$t0 jne .Lrounds_16_xx - lg $ctx,`$frame+16`($sp) + l${g} $ctx,`$frame+2*$SIZE_T`($sp) la $inp,`16*$SZ`($inp) $ADD $A,`0*$SZ`($ctx) $ADD $B,`1*$SZ`($ctx) @@ -283,14 +304,14 @@ $code.=<<___; $ST $F,`5*$SZ`($ctx) $ST $G,`6*$SZ`($ctx) $ST $H,`7*$SZ`($ctx) - clg $inp,`$frame+32`($sp) + cl${g} $inp,`$frame+4*$SIZE_T`($sp) jne .Lloop - lmg %r6,%r15,`$frame+48`($sp) + lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) br %r14 .size $Func,.-$Func .string "SHA${label} block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,8,8 +.comm OPENSSL_s390xcap_P,16,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/app/openssl/crypto/sha/asm/sha512-sparcv9.pl b/app/openssl/crypto/sha/asm/sha512-sparcv9.pl index ec5d7813..58574078 100644 --- a/app/openssl/crypto/sha/asm/sha512-sparcv9.pl +++ b/app/openssl/crypto/sha/asm/sha512-sparcv9.pl @@ -305,9 +305,9 @@ $code.=<<___; srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9] xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) srl @X[($i/2)%8],0,$tmp0 + add $tmp2,$tmp1,$tmp1 add $xi,$T1,$T1 ! +=X[i] xor $tmp0,@X[($i/2)%8],@X[($i/2)%8] - add $tmp2,$T1,$T1 add $tmp1,$T1,$T1 srl $T1,0,$T1 @@ -318,9 +318,9 @@ ___ $code.=<<___; srlx @X[($i/2)%8],32,$tmp1 ! X[i] xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) - srl @X[($i/2)%8],0,@X[($i/2)%8] add $xi,$T1,$T1 ! +=X[i+9] - add $tmp2,$T1,$T1 + add $tmp2,$tmp1,$tmp1 + srl @X[($i/2)%8],0,@X[($i/2)%8] add $tmp1,$T1,$T1 sllx $T1,32,$tmp0 diff --git a/app/openssl/crypto/sha/asm/sha512-x86_64.S b/app/openssl/crypto/sha/asm/sha512-x86_64.S new file mode 100644 index 00000000..2d3294e0 --- /dev/null +++ b/app/openssl/crypto/sha/asm/sha512-x86_64.S @@ -0,0 +1,1802 @@ +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/app/openssl/crypto/sha/asm/sha512-x86_64.pl b/app/openssl/crypto/sha/asm/sha512-x86_64.pl index e6643f8c..8d516785 100755 --- a/app/openssl/crypto/sha/asm/sha512-x86_64.pl +++ b/app/openssl/crypto/sha/asm/sha512-x86_64.pl @@ -51,7 +51,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; if ($output =~ /512/) { $func="sha512_block_data_order"; @@ -95,50 +96,44 @@ sub ROUND_00_15() { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - mov $e,$a0 - mov $e,$a1 + ror \$`$Sigma1[2]-$Sigma1[1]`,$a0 mov $f,$a2 + mov $T1,`$SZ*($i&0xf)`(%rsp) - ror \$$Sigma1[0],$a0 - ror \$$Sigma1[1],$a1 + ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 + xor $e,$a0 xor $g,$a2 # f^g - xor $a1,$a0 - ror \$`$Sigma1[2]-$Sigma1[1]`,$a1 + ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 + add $h,$T1 # T1+=h + xor $a,$a1 + + add ($Tbl,$round,$SZ),$T1 # T1+=K[round] and $e,$a2 # (f^g)&e - mov $T1,`$SZ*($i&0xf)`(%rsp) + mov $b,$h - xor $a1,$a0 # Sigma1(e) + ror \$`$Sigma0[1]-$Sigma0[0]`,$a1 + xor $e,$a0 xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g - add $h,$T1 # T1+=h - - mov $a,$h - add $a0,$T1 # T1+=Sigma1(e) + xor $c,$h # b^c + xor $a,$a1 add $a2,$T1 # T1+=Ch(e,f,g) - mov $a,$a0 - mov $a,$a1 + mov $b,$a2 - ror \$$Sigma0[0],$h - ror \$$Sigma0[1],$a0 - mov $a,$a2 - add ($Tbl,$round,$SZ),$T1 # T1+=K[round] + ror \$$Sigma1[0],$a0 # Sigma1(e) + and $a,$h # h=(b^c)&a + and $c,$a2 # b&c - xor $a0,$h - ror \$`$Sigma0[2]-$Sigma0[1]`,$a0 - or $c,$a1 # a|c + ror \$$Sigma0[0],$a1 # Sigma0(a) + add $a0,$T1 # T1+=Sigma1(e) + add $a2,$h # h+=b&c (completes +=Maj(a,b,c) - xor $a0,$h # h=Sigma0(a) - and $c,$a2 # a&c add $T1,$d # d+=T1 - - and $b,$a1 # (a|c)&b add $T1,$h # h+=T1 - - or $a2,$a1 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1($round),$round # round++ + add $a1,$h # h+=Sigma0(a) - add $a1,$h # h+=Maj(a,b,c) ___ } @@ -147,32 +142,30 @@ sub ROUND_16_XX() $code.=<<___; mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 - mov `$SZ*(($i+14)&0xf)`(%rsp),$T1 - - mov $a0,$a2 + mov `$SZ*(($i+14)&0xf)`(%rsp),$a1 + mov $a0,$T1 + mov $a1,$a2 + ror \$`$sigma0[1]-$sigma0[0]`,$T1 + xor $a0,$T1 shr \$$sigma0[2],$a0 - ror \$$sigma0[0],$a2 - - xor $a2,$a0 - ror \$`$sigma0[1]-$sigma0[0]`,$a2 - xor $a2,$a0 # sigma0(X[(i+1)&0xf]) - mov $T1,$a1 + ror \$$sigma0[0],$T1 + xor $T1,$a0 # sigma0(X[(i+1)&0xf]) + mov `$SZ*(($i+9)&0xf)`(%rsp),$T1 - shr \$$sigma1[2],$T1 - ror \$$sigma1[0],$a1 - - xor $a1,$T1 - ror \$`$sigma1[1]-$sigma1[0]`,$a1 - - xor $a1,$T1 # sigma1(X[(i+14)&0xf]) + ror \$`$sigma1[1]-$sigma1[0]`,$a2 + xor $a1,$a2 + shr \$$sigma1[2],$a1 + ror \$$sigma1[0],$a2 add $a0,$T1 - - add `$SZ*(($i+9)&0xf)`(%rsp),$T1 + xor $a2,$a1 # sigma1(X[(i+14)&0xf]) add `$SZ*($i&0xf)`(%rsp),$T1 + mov $e,$a0 + add $a1,$T1 + mov $a,$a1 ___ &ROUND_00_15(@_); } @@ -219,6 +212,8 @@ $func: ___ for($i=0;$i<16;$i++) { $code.=" mov $SZ*$i($inp),$T1\n"; + $code.=" mov @ROT[4],$a0\n"; + $code.=" mov @ROT[0],$a1\n"; $code.=" bswap $T1\n"; &ROUND_00_15($i,@ROT); unshift(@ROT,pop(@ROT)); diff --git a/app/openssl/crypto/sha/sha.h b/app/openssl/crypto/sha/sha.h index 16cacf9f..8a6bf4bb 100644 --- a/app/openssl/crypto/sha/sha.h +++ b/app/openssl/crypto/sha/sha.h @@ -106,6 +106,9 @@ typedef struct SHAstate_st } SHA_CTX; #ifndef OPENSSL_NO_SHA0 +#ifdef OPENSSL_FIPS +int private_SHA_Init(SHA_CTX *c); +#endif int SHA_Init(SHA_CTX *c); int SHA_Update(SHA_CTX *c, const void *data, size_t len); int SHA_Final(unsigned char *md, SHA_CTX *c); @@ -113,6 +116,9 @@ unsigned char *SHA(const unsigned char *d, size_t n, unsigned char *md); void SHA_Transform(SHA_CTX *c, const unsigned char *data); #endif #ifndef OPENSSL_NO_SHA1 +#ifdef OPENSSL_FIPS +int private_SHA1_Init(SHA_CTX *c); +#endif int SHA1_Init(SHA_CTX *c); int SHA1_Update(SHA_CTX *c, const void *data, size_t len); int SHA1_Final(unsigned char *md, SHA_CTX *c); @@ -135,6 +141,10 @@ typedef struct SHA256state_st } SHA256_CTX; #ifndef OPENSSL_NO_SHA256 +#ifdef OPENSSL_FIPS +int private_SHA224_Init(SHA256_CTX *c); +int private_SHA256_Init(SHA256_CTX *c); +#endif int SHA224_Init(SHA256_CTX *c); int SHA224_Update(SHA256_CTX *c, const void *data, size_t len); int SHA224_Final(unsigned char *md, SHA256_CTX *c); @@ -182,6 +192,10 @@ typedef struct SHA512state_st #endif #ifndef OPENSSL_NO_SHA512 +#ifdef OPENSSL_FIPS +int private_SHA384_Init(SHA512_CTX *c); +int private_SHA512_Init(SHA512_CTX *c); +#endif int SHA384_Init(SHA512_CTX *c); int SHA384_Update(SHA512_CTX *c, const void *data, size_t len); int SHA384_Final(unsigned char *md, SHA512_CTX *c); diff --git a/app/openssl/crypto/sha/sha1_one.c b/app/openssl/crypto/sha/sha1_one.c index 7c65b602..c56ec940 100644 --- a/app/openssl/crypto/sha/sha1_one.c +++ b/app/openssl/crypto/sha/sha1_one.c @@ -58,8 +58,8 @@ #include #include -#include #include +#include #ifndef OPENSSL_NO_SHA1 unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md) diff --git a/app/openssl/crypto/sha/sha1dgst.c b/app/openssl/crypto/sha/sha1dgst.c index 50d1925c..a9869022 100644 --- a/app/openssl/crypto/sha/sha1dgst.c +++ b/app/openssl/crypto/sha/sha1dgst.c @@ -56,6 +56,7 @@ * [including the GNU Public Licence.] */ +#include #include #if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA) diff --git a/app/openssl/crypto/sha/sha256.c b/app/openssl/crypto/sha/sha256.c index 8952d876..4eae0748 100644 --- a/app/openssl/crypto/sha/sha256.c +++ b/app/openssl/crypto/sha/sha256.c @@ -16,7 +16,7 @@ const char SHA256_version[]="SHA-256" OPENSSL_VERSION_PTEXT; -int SHA224_Init (SHA256_CTX *c) +fips_md_init_ctx(SHA224, SHA256) { memset (c,0,sizeof(*c)); c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL; @@ -27,7 +27,7 @@ int SHA224_Init (SHA256_CTX *c) return 1; } -int SHA256_Init (SHA256_CTX *c) +fips_md_init(SHA256) { memset (c,0,sizeof(*c)); c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; @@ -88,17 +88,17 @@ int SHA224_Final (unsigned char *md, SHA256_CTX *c) switch ((c)->md_len) \ { case SHA224_DIGEST_LENGTH: \ for (nn=0;nnh[nn]; HOST_l2c(ll,(s)); } \ + { ll=(c)->h[nn]; (void)HOST_l2c(ll,(s)); } \ break; \ case SHA256_DIGEST_LENGTH: \ for (nn=0;nnh[nn]; HOST_l2c(ll,(s)); } \ + { ll=(c)->h[nn]; (void)HOST_l2c(ll,(s)); } \ break; \ default: \ if ((c)->md_len > SHA256_DIGEST_LENGTH) \ return 0; \ for (nn=0;nn<(c)->md_len/4;nn++) \ - { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \ + { ll=(c)->h[nn]; (void)HOST_l2c(ll,(s)); } \ break; \ } \ } while (0) diff --git a/app/openssl/crypto/sha/sha512.c b/app/openssl/crypto/sha/sha512.c index cbc0e58c..50c229dd 100644 --- a/app/openssl/crypto/sha/sha512.c +++ b/app/openssl/crypto/sha/sha512.c @@ -59,21 +59,8 @@ const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT; #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA #endif -int SHA384_Init (SHA512_CTX *c) +fips_md_init_ctx(SHA384, SHA512) { -#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm)) - /* maintain dword order required by assembler module */ - unsigned int *h = (unsigned int *)c->h; - - h[0] = 0xcbbb9d5d; h[1] = 0xc1059ed8; - h[2] = 0x629a292a; h[3] = 0x367cd507; - h[4] = 0x9159015a; h[5] = 0x3070dd17; - h[6] = 0x152fecd8; h[7] = 0xf70e5939; - h[8] = 0x67332667; h[9] = 0xffc00b31; - h[10] = 0x8eb44a87; h[11] = 0x68581511; - h[12] = 0xdb0c2e0d; h[13] = 0x64f98fa7; - h[14] = 0x47b5481d; h[15] = 0xbefa4fa4; -#else c->h[0]=U64(0xcbbb9d5dc1059ed8); c->h[1]=U64(0x629a292a367cd507); c->h[2]=U64(0x9159015a3070dd17); @@ -82,27 +69,14 @@ int SHA384_Init (SHA512_CTX *c) c->h[5]=U64(0x8eb44a8768581511); c->h[6]=U64(0xdb0c2e0d64f98fa7); c->h[7]=U64(0x47b5481dbefa4fa4); -#endif + c->Nl=0; c->Nh=0; c->num=0; c->md_len=SHA384_DIGEST_LENGTH; return 1; } -int SHA512_Init (SHA512_CTX *c) +fips_md_init(SHA512) { -#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm)) - /* maintain dword order required by assembler module */ - unsigned int *h = (unsigned int *)c->h; - - h[0] = 0x6a09e667; h[1] = 0xf3bcc908; - h[2] = 0xbb67ae85; h[3] = 0x84caa73b; - h[4] = 0x3c6ef372; h[5] = 0xfe94f82b; - h[6] = 0xa54ff53a; h[7] = 0x5f1d36f1; - h[8] = 0x510e527f; h[9] = 0xade682d1; - h[10] = 0x9b05688c; h[11] = 0x2b3e6c1f; - h[12] = 0x1f83d9ab; h[13] = 0xfb41bd6b; - h[14] = 0x5be0cd19; h[15] = 0x137e2179; -#else c->h[0]=U64(0x6a09e667f3bcc908); c->h[1]=U64(0xbb67ae8584caa73b); c->h[2]=U64(0x3c6ef372fe94f82b); @@ -111,7 +85,7 @@ int SHA512_Init (SHA512_CTX *c) c->h[5]=U64(0x9b05688c2b3e6c1f); c->h[6]=U64(0x1f83d9abfb41bd6b); c->h[7]=U64(0x5be0cd19137e2179); -#endif + c->Nl=0; c->Nh=0; c->num=0; c->md_len=SHA512_DIGEST_LENGTH; return 1; @@ -160,24 +134,6 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c) if (md==0) return 0; -#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm)) - /* recall assembler dword order... */ - n = c->md_len; - if (n == SHA384_DIGEST_LENGTH || n == SHA512_DIGEST_LENGTH) - { - unsigned int *h = (unsigned int *)c->h, t; - - for (n/=4;n;n--) - { - t = *(h++); - *(md++) = (unsigned char)(t>>24); - *(md++) = (unsigned char)(t>>16); - *(md++) = (unsigned char)(t>>8); - *(md++) = (unsigned char)(t); - } - } - else return 0; -#else switch (c->md_len) { /* Let compiler decide if it's appropriate to unroll... */ @@ -214,7 +170,7 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c) /* ... as well as make sure md_len is not abused. */ default: return 0; } -#endif + return 1; } @@ -276,7 +232,14 @@ int SHA384_Update (SHA512_CTX *c, const void *data, size_t len) { return SHA512_Update (c,data,len); } void SHA512_Transform (SHA512_CTX *c, const unsigned char *data) -{ sha512_block_data_order (c,data,1); } + { +#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA + if ((size_t)data%sizeof(c->u.d[0]) != 0) + memcpy(c->u.p,data,sizeof(c->u.p)), + data = c->u.p; +#endif + sha512_block_data_order (c,data,1); + } unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md) { diff --git a/app/openssl/crypto/sha/sha_dgst.c b/app/openssl/crypto/sha/sha_dgst.c index 70eb5603..fb63b17f 100644 --- a/app/openssl/crypto/sha/sha_dgst.c +++ b/app/openssl/crypto/sha/sha_dgst.c @@ -56,6 +56,7 @@ * [including the GNU Public Licence.] */ +#include #include #if !defined(OPENSSL_NO_SHA0) && !defined(OPENSSL_NO_SHA) diff --git a/app/openssl/crypto/sha/sha_locl.h b/app/openssl/crypto/sha/sha_locl.h index 672c26ee..d673255f 100644 --- a/app/openssl/crypto/sha/sha_locl.h +++ b/app/openssl/crypto/sha/sha_locl.h @@ -69,11 +69,11 @@ #define HASH_CBLOCK SHA_CBLOCK #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ - ll=(c)->h0; HOST_l2c(ll,(s)); \ - ll=(c)->h1; HOST_l2c(ll,(s)); \ - ll=(c)->h2; HOST_l2c(ll,(s)); \ - ll=(c)->h3; HOST_l2c(ll,(s)); \ - ll=(c)->h4; HOST_l2c(ll,(s)); \ + ll=(c)->h0; (void)HOST_l2c(ll,(s)); \ + ll=(c)->h1; (void)HOST_l2c(ll,(s)); \ + ll=(c)->h2; (void)HOST_l2c(ll,(s)); \ + ll=(c)->h3; (void)HOST_l2c(ll,(s)); \ + ll=(c)->h4; (void)HOST_l2c(ll,(s)); \ } while (0) #if defined(SHA_0) @@ -122,7 +122,11 @@ void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num); #define INIT_DATA_h3 0x10325476UL #define INIT_DATA_h4 0xc3d2e1f0UL -int HASH_INIT (SHA_CTX *c) +#ifdef SHA_0 +fips_md_init(SHA) +#else +fips_md_init_ctx(SHA1, SHA) +#endif { memset (c,0,sizeof(*c)); c->h0=INIT_DATA_h0; @@ -252,21 +256,21 @@ static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) } else { - HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; - BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l; - BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l; - BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l; - BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l; - BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l; - BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l; - BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l; - BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l; - BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l; - BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l; - BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l; - BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l; - BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l; - BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l; + (void)HOST_c2l(data,l); X( 0)=l; (void)HOST_c2l(data,l); X( 1)=l; + BODY_00_15( 0,A,B,C,D,E,T,X( 0)); (void)HOST_c2l(data,l); X( 2)=l; + BODY_00_15( 1,T,A,B,C,D,E,X( 1)); (void)HOST_c2l(data,l); X( 3)=l; + BODY_00_15( 2,E,T,A,B,C,D,X( 2)); (void)HOST_c2l(data,l); X( 4)=l; + BODY_00_15( 3,D,E,T,A,B,C,X( 3)); (void)HOST_c2l(data,l); X( 5)=l; + BODY_00_15( 4,C,D,E,T,A,B,X( 4)); (void)HOST_c2l(data,l); X( 6)=l; + BODY_00_15( 5,B,C,D,E,T,A,X( 5)); (void)HOST_c2l(data,l); X( 7)=l; + BODY_00_15( 6,A,B,C,D,E,T,X( 6)); (void)HOST_c2l(data,l); X( 8)=l; + BODY_00_15( 7,T,A,B,C,D,E,X( 7)); (void)HOST_c2l(data,l); X( 9)=l; + BODY_00_15( 8,E,T,A,B,C,D,X( 8)); (void)HOST_c2l(data,l); X(10)=l; + BODY_00_15( 9,D,E,T,A,B,C,X( 9)); (void)HOST_c2l(data,l); X(11)=l; + BODY_00_15(10,C,D,E,T,A,B,X(10)); (void)HOST_c2l(data,l); X(12)=l; + BODY_00_15(11,B,C,D,E,T,A,X(11)); (void)HOST_c2l(data,l); X(13)=l; + BODY_00_15(12,A,B,C,D,E,T,X(12)); (void)HOST_c2l(data,l); X(14)=l; + BODY_00_15(13,T,A,B,C,D,E,X(13)); (void)HOST_c2l(data,l); X(15)=l; BODY_00_15(14,E,T,A,B,C,D,X(14)); BODY_00_15(15,D,E,T,A,B,C,X(15)); } diff --git a/app/openssl/crypto/sparccpuid.S b/app/openssl/crypto/sparccpuid.S index ae61f7f5..c63d5da4 100644 --- a/app/openssl/crypto/sparccpuid.S +++ b/app/openssl/crypto/sparccpuid.S @@ -123,7 +123,7 @@ OPENSSL_wipe_cpu: fmovs %f1,%f3 fmovs %f0,%f2 - add %fp,BIAS,%i0 ! return pointer to caller´s top of stack + add %fp,BIAS,%i0 ! return pointer to caller´s top of stack ret restore @@ -235,10 +235,10 @@ _sparcv9_rdtick: .global _sparcv9_vis1_probe .align 8 _sparcv9_vis1_probe: - .word 0x81b00d80 !fxor %f0,%f0,%f0 add %sp,BIAS+2,%o1 - retl .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 + retl + .word 0x81b00d80 !fxor %f0,%f0,%f0 .type _sparcv9_vis1_probe,#function .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe diff --git a/app/openssl/crypto/sparcv9cap.c b/app/openssl/crypto/sparcv9cap.c index ed195ab4..43b3ac6f 100644 --- a/app/openssl/crypto/sparcv9cap.c +++ b/app/openssl/crypto/sparcv9cap.c @@ -19,7 +19,8 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); - if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == + if (num>=8 && !(num&1) && + (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else @@ -169,7 +170,6 @@ void OPENSSL_cpuid_setup(void) char *e; struct sigaction common_act,ill_oact,bus_oact; sigset_t all_masked,oset; - int sig; static int trigger=0; if (trigger) return; diff --git a/app/openssl/crypto/srp/srp.h b/app/openssl/crypto/srp/srp.h new file mode 100644 index 00000000..7ec7825c --- /dev/null +++ b/app/openssl/crypto/srp/srp.h @@ -0,0 +1,172 @@ +/* crypto/srp/srp.h */ +/* Written by Christophe Renou (christophe.renou@edelweb.fr) with + * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#ifndef __SRP_H__ +#define __SRP_H__ + +#ifndef OPENSSL_NO_SRP + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +typedef struct SRP_gN_cache_st + { + char *b64_bn; + BIGNUM *bn; + } SRP_gN_cache; + + +DECLARE_STACK_OF(SRP_gN_cache) + +typedef struct SRP_user_pwd_st + { + char *id; + BIGNUM *s; + BIGNUM *v; + const BIGNUM *g; + const BIGNUM *N; + char *info; + } SRP_user_pwd; + +DECLARE_STACK_OF(SRP_user_pwd) + +typedef struct SRP_VBASE_st + { + STACK_OF(SRP_user_pwd) *users_pwd; + STACK_OF(SRP_gN_cache) *gN_cache; +/* to simulate a user */ + char *seed_key; + BIGNUM *default_g; + BIGNUM *default_N; + } SRP_VBASE; + + +/*Structure interne pour retenir les couples N et g*/ +typedef struct SRP_gN_st + { + char *id; + BIGNUM *g; + BIGNUM *N; + } SRP_gN; + +DECLARE_STACK_OF(SRP_gN) + +SRP_VBASE *SRP_VBASE_new(char *seed_key); +int SRP_VBASE_free(SRP_VBASE *vb); +int SRP_VBASE_init(SRP_VBASE *vb, char * verifier_file); +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g); +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g); + + +#define SRP_NO_ERROR 0 +#define SRP_ERR_VBASE_INCOMPLETE_FILE 1 +#define SRP_ERR_VBASE_BN_LIB 2 +#define SRP_ERR_OPEN_FILE 3 +#define SRP_ERR_MEMORY 4 + +#define DB_srptype 0 +#define DB_srpverifier 1 +#define DB_srpsalt 2 +#define DB_srpid 3 +#define DB_srpgN 4 +#define DB_srpinfo 5 +#undef DB_NUMBER +#define DB_NUMBER 6 + +#define DB_SRP_INDEX 'I' +#define DB_SRP_VALID 'V' +#define DB_SRP_REVOKED 'R' +#define DB_SRP_MODIF 'v' + + +/* see srp.c */ +char * SRP_check_known_gN_param(BIGNUM* g, BIGNUM* N); +SRP_gN *SRP_get_default_gN(const char * id) ; + +/* server side .... */ +BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N); +BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v); +int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N); +BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N) ; + + + +/* client side .... */ +BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass); +BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g); +BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u); +int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N); + +#define SRP_MINIMAL_N 1024 + +#ifdef __cplusplus +} +#endif + +#endif +#endif diff --git a/app/openssl/crypto/srp/srp_grps.h b/app/openssl/crypto/srp/srp_grps.h new file mode 100644 index 00000000..8e3c35e3 --- /dev/null +++ b/app/openssl/crypto/srp/srp_grps.h @@ -0,0 +1,517 @@ +/* start of generated data */ + +static BN_ULONG bn_group_1024_value[] = { + bn_pack4(0x9FC6,0x1D2F,0xC0EB,0x06E3), + bn_pack4(0xFD51,0x38FE,0x8376,0x435B), + bn_pack4(0x2FD4,0xCBF4,0x976E,0xAA9A), + bn_pack4(0x68ED,0xBC3C,0x0572,0x6CC0), + bn_pack4(0xC529,0xF566,0x660E,0x57EC), + bn_pack4(0x8255,0x9B29,0x7BCF,0x1885), + bn_pack4(0xCE8E,0xF4AD,0x69B1,0x5D49), + bn_pack4(0x5DC7,0xD7B4,0x6154,0xD6B6), + bn_pack4(0x8E49,0x5C1D,0x6089,0xDAD1), + bn_pack4(0xE0D5,0xD8E2,0x50B9,0x8BE4), + bn_pack4(0x383B,0x4813,0xD692,0xC6E0), + bn_pack4(0xD674,0xDF74,0x96EA,0x81D3), + bn_pack4(0x9EA2,0x314C,0x9C25,0x6576), + bn_pack4(0x6072,0x6187,0x75FF,0x3C0B), + bn_pack4(0x9C33,0xF80A,0xFA8F,0xC5E8), + bn_pack4(0xEEAF,0x0AB9,0xADB3,0x8DD6) +}; +static BIGNUM bn_group_1024 = { + bn_group_1024_value, + (sizeof bn_group_1024_value)/sizeof(BN_ULONG), + (sizeof bn_group_1024_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_1536_value[] = { + bn_pack4(0xCF76,0xE3FE,0xD135,0xF9BB), + bn_pack4(0x1518,0x0F93,0x499A,0x234D), + bn_pack4(0x8CE7,0xA28C,0x2442,0xC6F3), + bn_pack4(0x5A02,0x1FFF,0x5E91,0x479E), + bn_pack4(0x7F8A,0x2FE9,0xB8B5,0x292E), + bn_pack4(0x837C,0x264A,0xE3A9,0xBEB8), + bn_pack4(0xE442,0x734A,0xF7CC,0xB7AE), + bn_pack4(0x6577,0x2E43,0x7D6C,0x7F8C), + bn_pack4(0xDB2F,0xD53D,0x24B7,0xC486), + bn_pack4(0x6EDF,0x0195,0x3934,0x9627), + bn_pack4(0x158B,0xFD3E,0x2B9C,0x8CF5), + bn_pack4(0x764E,0x3F4B,0x53DD,0x9DA1), + bn_pack4(0x4754,0x8381,0xDBC5,0xB1FC), + bn_pack4(0x9B60,0x9E0B,0xE3BA,0xB63D), + bn_pack4(0x8134,0xB1C8,0xB979,0x8914), + bn_pack4(0xDF02,0x8A7C,0xEC67,0xF0D0), + bn_pack4(0x80B6,0x55BB,0x9A22,0xE8DC), + bn_pack4(0x1558,0x903B,0xA0D0,0xF843), + bn_pack4(0x51C6,0xA94B,0xE460,0x7A29), + bn_pack4(0x5F4F,0x5F55,0x6E27,0xCBDE), + bn_pack4(0xBEEE,0xA961,0x4B19,0xCC4D), + bn_pack4(0xDBA5,0x1DF4,0x99AC,0x4C80), + bn_pack4(0xB1F1,0x2A86,0x17A4,0x7BBB), + bn_pack4(0x9DEF,0x3CAF,0xB939,0x277A) +}; +static BIGNUM bn_group_1536 = { + bn_group_1536_value, + (sizeof bn_group_1536_value)/sizeof(BN_ULONG), + (sizeof bn_group_1536_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_2048_value[] = { + bn_pack4(0x0FA7,0x111F,0x9E4A,0xFF73), + bn_pack4(0x9B65,0xE372,0xFCD6,0x8EF2), + bn_pack4(0x35DE,0x236D,0x525F,0x5475), + bn_pack4(0x94B5,0xC803,0xD89F,0x7AE4), + bn_pack4(0x71AE,0x35F8,0xE9DB,0xFBB6), + bn_pack4(0x2A56,0x98F3,0xA8D0,0xC382), + bn_pack4(0x9CCC,0x041C,0x7BC3,0x08D8), + bn_pack4(0xAF87,0x4E73,0x03CE,0x5329), + bn_pack4(0x6160,0x2790,0x04E5,0x7AE6), + bn_pack4(0x032C,0xFBDB,0xF52F,0xB378), + bn_pack4(0x5EA7,0x7A27,0x75D2,0xECFA), + bn_pack4(0x5445,0x23B5,0x24B0,0xD57D), + bn_pack4(0x5B9D,0x32E6,0x88F8,0x7748), + bn_pack4(0xF1D2,0xB907,0x8717,0x461A), + bn_pack4(0x76BD,0x207A,0x436C,0x6481), + bn_pack4(0xCA97,0xB43A,0x23FB,0x8016), + bn_pack4(0x1D28,0x1E44,0x6B14,0x773B), + bn_pack4(0x7359,0xD041,0xD5C3,0x3EA7), + bn_pack4(0xA80D,0x740A,0xDBF4,0xFF74), + bn_pack4(0x55F9,0x7993,0xEC97,0x5EEA), + bn_pack4(0x2918,0xA996,0x2F0B,0x93B8), + bn_pack4(0x661A,0x05FB,0xD5FA,0xAAE8), + bn_pack4(0xCF60,0x9517,0x9A16,0x3AB3), + bn_pack4(0xE808,0x3969,0xEDB7,0x67B0), + bn_pack4(0xCD7F,0x48A9,0xDA04,0xFD50), + bn_pack4(0xD523,0x12AB,0x4B03,0x310D), + bn_pack4(0x8193,0xE075,0x7767,0xA13D), + bn_pack4(0xA373,0x29CB,0xB4A0,0x99ED), + bn_pack4(0xFC31,0x9294,0x3DB5,0x6050), + bn_pack4(0xAF72,0xB665,0x1987,0xEE07), + bn_pack4(0xF166,0xDE5E,0x1389,0x582F), + bn_pack4(0xAC6B,0xDB41,0x324A,0x9A9B) +}; +static BIGNUM bn_group_2048 = { + bn_group_2048_value, + (sizeof bn_group_2048_value)/sizeof(BN_ULONG), + (sizeof bn_group_2048_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_3072_value[] = { + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF), + bn_pack4(0x4B82,0xD120,0xA93A,0xD2CA), + bn_pack4(0x43DB,0x5BFC,0xE0FD,0x108E), + bn_pack4(0x08E2,0x4FA0,0x74E5,0xAB31), + bn_pack4(0x7709,0x88C0,0xBAD9,0x46E2), + bn_pack4(0xBBE1,0x1757,0x7A61,0x5D6C), + bn_pack4(0x521F,0x2B18,0x177B,0x200C), + bn_pack4(0xD876,0x0273,0x3EC8,0x6A64), + bn_pack4(0xF12F,0xFA06,0xD98A,0x0864), + bn_pack4(0xCEE3,0xD226,0x1AD2,0xEE6B), + bn_pack4(0x1E8C,0x94E0,0x4A25,0x619D), + bn_pack4(0xABF5,0xAE8C,0xDB09,0x33D7), + bn_pack4(0xB397,0x0F85,0xA6E1,0xE4C7), + bn_pack4(0x8AEA,0x7157,0x5D06,0x0C7D), + bn_pack4(0xECFB,0x8504,0x58DB,0xEF0A), + bn_pack4(0xA855,0x21AB,0xDF1C,0xBA64), + bn_pack4(0xAD33,0x170D,0x0450,0x7A33), + bn_pack4(0x1572,0x8E5A,0x8AAA,0xC42D), + bn_pack4(0x15D2,0x2618,0x98FA,0x0510), + bn_pack4(0x3995,0x497C,0xEA95,0x6AE5), + bn_pack4(0xDE2B,0xCBF6,0x9558,0x1718), + bn_pack4(0xB5C5,0x5DF0,0x6F4C,0x52C9), + bn_pack4(0x9B27,0x83A2,0xEC07,0xA28F), + bn_pack4(0xE39E,0x772C,0x180E,0x8603), + bn_pack4(0x3290,0x5E46,0x2E36,0xCE3B), + bn_pack4(0xF174,0x6C08,0xCA18,0x217C), + bn_pack4(0x670C,0x354E,0x4ABC,0x9804), + bn_pack4(0x9ED5,0x2907,0x7096,0x966D), + bn_pack4(0x1C62,0xF356,0x2085,0x52BB), + bn_pack4(0x8365,0x5D23,0xDCA3,0xAD96), + bn_pack4(0x6916,0x3FA8,0xFD24,0xCF5F), + bn_pack4(0x98DA,0x4836,0x1C55,0xD39A), + bn_pack4(0xC200,0x7CB8,0xA163,0xBF05), + bn_pack4(0x4928,0x6651,0xECE4,0x5B3D), + bn_pack4(0xAE9F,0x2411,0x7C4B,0x1FE6), + bn_pack4(0xEE38,0x6BFB,0x5A89,0x9FA5), + bn_pack4(0x0BFF,0x5CB6,0xF406,0xB7ED), + bn_pack4(0xF44C,0x42E9,0xA637,0xED6B), + bn_pack4(0xE485,0xB576,0x625E,0x7EC6), + bn_pack4(0x4FE1,0x356D,0x6D51,0xC245), + bn_pack4(0x302B,0x0A6D,0xF25F,0x1437), + bn_pack4(0xEF95,0x19B3,0xCD3A,0x431B), + bn_pack4(0x514A,0x0879,0x8E34,0x04DD), + bn_pack4(0x020B,0xBEA6,0x3B13,0x9B22), + bn_pack4(0x2902,0x4E08,0x8A67,0xCC74), + bn_pack4(0xC4C6,0x628B,0x80DC,0x1CD1), + bn_pack4(0xC90F,0xDAA2,0x2168,0xC234), + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF) +}; +static BIGNUM bn_group_3072 = { + bn_group_3072_value, + (sizeof bn_group_3072_value)/sizeof(BN_ULONG), + (sizeof bn_group_3072_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_4096_value[] = { + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF), + bn_pack4(0x4DF4,0x35C9,0x3406,0x3199), + bn_pack4(0x86FF,0xB7DC,0x90A6,0xC08F), + bn_pack4(0x93B4,0xEA98,0x8D8F,0xDDC1), + bn_pack4(0xD006,0x9127,0xD5B0,0x5AA9), + bn_pack4(0xB81B,0xDD76,0x2170,0x481C), + bn_pack4(0x1F61,0x2970,0xCEE2,0xD7AF), + bn_pack4(0x233B,0xA186,0x515B,0xE7ED), + bn_pack4(0x99B2,0x964F,0xA090,0xC3A2), + bn_pack4(0x287C,0x5947,0x4E6B,0xC05D), + bn_pack4(0x2E8E,0xFC14,0x1FBE,0xCAA6), + bn_pack4(0xDBBB,0xC2DB,0x04DE,0x8EF9), + bn_pack4(0x2583,0xE9CA,0x2AD4,0x4CE8), + bn_pack4(0x1A94,0x6834,0xB615,0x0BDA), + bn_pack4(0x99C3,0x2718,0x6AF4,0xE23C), + bn_pack4(0x8871,0x9A10,0xBDBA,0x5B26), + bn_pack4(0x1A72,0x3C12,0xA787,0xE6D7), + bn_pack4(0x4B82,0xD120,0xA921,0x0801), + bn_pack4(0x43DB,0x5BFC,0xE0FD,0x108E), + bn_pack4(0x08E2,0x4FA0,0x74E5,0xAB31), + bn_pack4(0x7709,0x88C0,0xBAD9,0x46E2), + bn_pack4(0xBBE1,0x1757,0x7A61,0x5D6C), + bn_pack4(0x521F,0x2B18,0x177B,0x200C), + bn_pack4(0xD876,0x0273,0x3EC8,0x6A64), + bn_pack4(0xF12F,0xFA06,0xD98A,0x0864), + bn_pack4(0xCEE3,0xD226,0x1AD2,0xEE6B), + bn_pack4(0x1E8C,0x94E0,0x4A25,0x619D), + bn_pack4(0xABF5,0xAE8C,0xDB09,0x33D7), + bn_pack4(0xB397,0x0F85,0xA6E1,0xE4C7), + bn_pack4(0x8AEA,0x7157,0x5D06,0x0C7D), + bn_pack4(0xECFB,0x8504,0x58DB,0xEF0A), + bn_pack4(0xA855,0x21AB,0xDF1C,0xBA64), + bn_pack4(0xAD33,0x170D,0x0450,0x7A33), + bn_pack4(0x1572,0x8E5A,0x8AAA,0xC42D), + bn_pack4(0x15D2,0x2618,0x98FA,0x0510), + bn_pack4(0x3995,0x497C,0xEA95,0x6AE5), + bn_pack4(0xDE2B,0xCBF6,0x9558,0x1718), + bn_pack4(0xB5C5,0x5DF0,0x6F4C,0x52C9), + bn_pack4(0x9B27,0x83A2,0xEC07,0xA28F), + bn_pack4(0xE39E,0x772C,0x180E,0x8603), + bn_pack4(0x3290,0x5E46,0x2E36,0xCE3B), + bn_pack4(0xF174,0x6C08,0xCA18,0x217C), + bn_pack4(0x670C,0x354E,0x4ABC,0x9804), + bn_pack4(0x9ED5,0x2907,0x7096,0x966D), + bn_pack4(0x1C62,0xF356,0x2085,0x52BB), + bn_pack4(0x8365,0x5D23,0xDCA3,0xAD96), + bn_pack4(0x6916,0x3FA8,0xFD24,0xCF5F), + bn_pack4(0x98DA,0x4836,0x1C55,0xD39A), + bn_pack4(0xC200,0x7CB8,0xA163,0xBF05), + bn_pack4(0x4928,0x6651,0xECE4,0x5B3D), + bn_pack4(0xAE9F,0x2411,0x7C4B,0x1FE6), + bn_pack4(0xEE38,0x6BFB,0x5A89,0x9FA5), + bn_pack4(0x0BFF,0x5CB6,0xF406,0xB7ED), + bn_pack4(0xF44C,0x42E9,0xA637,0xED6B), + bn_pack4(0xE485,0xB576,0x625E,0x7EC6), + bn_pack4(0x4FE1,0x356D,0x6D51,0xC245), + bn_pack4(0x302B,0x0A6D,0xF25F,0x1437), + bn_pack4(0xEF95,0x19B3,0xCD3A,0x431B), + bn_pack4(0x514A,0x0879,0x8E34,0x04DD), + bn_pack4(0x020B,0xBEA6,0x3B13,0x9B22), + bn_pack4(0x2902,0x4E08,0x8A67,0xCC74), + bn_pack4(0xC4C6,0x628B,0x80DC,0x1CD1), + bn_pack4(0xC90F,0xDAA2,0x2168,0xC234), + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF) +}; +static BIGNUM bn_group_4096 = { + bn_group_4096_value, + (sizeof bn_group_4096_value)/sizeof(BN_ULONG), + (sizeof bn_group_4096_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_6144_value[] = { + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF), + bn_pack4(0xE694,0xF91E,0x6DCC,0x4024), + bn_pack4(0x12BF,0x2D5B,0x0B74,0x74D6), + bn_pack4(0x043E,0x8F66,0x3F48,0x60EE), + bn_pack4(0x387F,0xE8D7,0x6E3C,0x0468), + bn_pack4(0xDA56,0xC9EC,0x2EF2,0x9632), + bn_pack4(0xEB19,0xCCB1,0xA313,0xD55C), + bn_pack4(0xF550,0xAA3D,0x8A1F,0xBFF0), + bn_pack4(0x06A1,0xD58B,0xB7C5,0xDA76), + bn_pack4(0xA797,0x15EE,0xF29B,0xE328), + bn_pack4(0x14CC,0x5ED2,0x0F80,0x37E0), + bn_pack4(0xCC8F,0x6D7E,0xBF48,0xE1D8), + bn_pack4(0x4BD4,0x07B2,0x2B41,0x54AA), + bn_pack4(0x0F1D,0x45B7,0xFF58,0x5AC5), + bn_pack4(0x23A9,0x7A7E,0x36CC,0x88BE), + bn_pack4(0x59E7,0xC97F,0xBEC7,0xE8F3), + bn_pack4(0xB5A8,0x4031,0x900B,0x1C9E), + bn_pack4(0xD55E,0x702F,0x4698,0x0C82), + bn_pack4(0xF482,0xD7CE,0x6E74,0xFEF6), + bn_pack4(0xF032,0xEA15,0xD172,0x1D03), + bn_pack4(0x5983,0xCA01,0xC64B,0x92EC), + bn_pack4(0x6FB8,0xF401,0x378C,0xD2BF), + bn_pack4(0x3320,0x5151,0x2BD7,0xAF42), + bn_pack4(0xDB7F,0x1447,0xE6CC,0x254B), + bn_pack4(0x44CE,0x6CBA,0xCED4,0xBB1B), + bn_pack4(0xDA3E,0xDBEB,0xCF9B,0x14ED), + bn_pack4(0x1797,0x27B0,0x865A,0x8918), + bn_pack4(0xB06A,0x53ED,0x9027,0xD831), + bn_pack4(0xE5DB,0x382F,0x4130,0x01AE), + bn_pack4(0xF8FF,0x9406,0xAD9E,0x530E), + bn_pack4(0xC975,0x1E76,0x3DBA,0x37BD), + bn_pack4(0xC1D4,0xDCB2,0x6026,0x46DE), + bn_pack4(0x36C3,0xFAB4,0xD27C,0x7026), + bn_pack4(0x4DF4,0x35C9,0x3402,0x8492), + bn_pack4(0x86FF,0xB7DC,0x90A6,0xC08F), + bn_pack4(0x93B4,0xEA98,0x8D8F,0xDDC1), + bn_pack4(0xD006,0x9127,0xD5B0,0x5AA9), + bn_pack4(0xB81B,0xDD76,0x2170,0x481C), + bn_pack4(0x1F61,0x2970,0xCEE2,0xD7AF), + bn_pack4(0x233B,0xA186,0x515B,0xE7ED), + bn_pack4(0x99B2,0x964F,0xA090,0xC3A2), + bn_pack4(0x287C,0x5947,0x4E6B,0xC05D), + bn_pack4(0x2E8E,0xFC14,0x1FBE,0xCAA6), + bn_pack4(0xDBBB,0xC2DB,0x04DE,0x8EF9), + bn_pack4(0x2583,0xE9CA,0x2AD4,0x4CE8), + bn_pack4(0x1A94,0x6834,0xB615,0x0BDA), + bn_pack4(0x99C3,0x2718,0x6AF4,0xE23C), + bn_pack4(0x8871,0x9A10,0xBDBA,0x5B26), + bn_pack4(0x1A72,0x3C12,0xA787,0xE6D7), + bn_pack4(0x4B82,0xD120,0xA921,0x0801), + bn_pack4(0x43DB,0x5BFC,0xE0FD,0x108E), + bn_pack4(0x08E2,0x4FA0,0x74E5,0xAB31), + bn_pack4(0x7709,0x88C0,0xBAD9,0x46E2), + bn_pack4(0xBBE1,0x1757,0x7A61,0x5D6C), + bn_pack4(0x521F,0x2B18,0x177B,0x200C), + bn_pack4(0xD876,0x0273,0x3EC8,0x6A64), + bn_pack4(0xF12F,0xFA06,0xD98A,0x0864), + bn_pack4(0xCEE3,0xD226,0x1AD2,0xEE6B), + bn_pack4(0x1E8C,0x94E0,0x4A25,0x619D), + bn_pack4(0xABF5,0xAE8C,0xDB09,0x33D7), + bn_pack4(0xB397,0x0F85,0xA6E1,0xE4C7), + bn_pack4(0x8AEA,0x7157,0x5D06,0x0C7D), + bn_pack4(0xECFB,0x8504,0x58DB,0xEF0A), + bn_pack4(0xA855,0x21AB,0xDF1C,0xBA64), + bn_pack4(0xAD33,0x170D,0x0450,0x7A33), + bn_pack4(0x1572,0x8E5A,0x8AAA,0xC42D), + bn_pack4(0x15D2,0x2618,0x98FA,0x0510), + bn_pack4(0x3995,0x497C,0xEA95,0x6AE5), + bn_pack4(0xDE2B,0xCBF6,0x9558,0x1718), + bn_pack4(0xB5C5,0x5DF0,0x6F4C,0x52C9), + bn_pack4(0x9B27,0x83A2,0xEC07,0xA28F), + bn_pack4(0xE39E,0x772C,0x180E,0x8603), + bn_pack4(0x3290,0x5E46,0x2E36,0xCE3B), + bn_pack4(0xF174,0x6C08,0xCA18,0x217C), + bn_pack4(0x670C,0x354E,0x4ABC,0x9804), + bn_pack4(0x9ED5,0x2907,0x7096,0x966D), + bn_pack4(0x1C62,0xF356,0x2085,0x52BB), + bn_pack4(0x8365,0x5D23,0xDCA3,0xAD96), + bn_pack4(0x6916,0x3FA8,0xFD24,0xCF5F), + bn_pack4(0x98DA,0x4836,0x1C55,0xD39A), + bn_pack4(0xC200,0x7CB8,0xA163,0xBF05), + bn_pack4(0x4928,0x6651,0xECE4,0x5B3D), + bn_pack4(0xAE9F,0x2411,0x7C4B,0x1FE6), + bn_pack4(0xEE38,0x6BFB,0x5A89,0x9FA5), + bn_pack4(0x0BFF,0x5CB6,0xF406,0xB7ED), + bn_pack4(0xF44C,0x42E9,0xA637,0xED6B), + bn_pack4(0xE485,0xB576,0x625E,0x7EC6), + bn_pack4(0x4FE1,0x356D,0x6D51,0xC245), + bn_pack4(0x302B,0x0A6D,0xF25F,0x1437), + bn_pack4(0xEF95,0x19B3,0xCD3A,0x431B), + bn_pack4(0x514A,0x0879,0x8E34,0x04DD), + bn_pack4(0x020B,0xBEA6,0x3B13,0x9B22), + bn_pack4(0x2902,0x4E08,0x8A67,0xCC74), + bn_pack4(0xC4C6,0x628B,0x80DC,0x1CD1), + bn_pack4(0xC90F,0xDAA2,0x2168,0xC234), + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF) +}; +static BIGNUM bn_group_6144 = { + bn_group_6144_value, + (sizeof bn_group_6144_value)/sizeof(BN_ULONG), + (sizeof bn_group_6144_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_group_8192_value[] = { + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF), + bn_pack4(0x60C9,0x80DD,0x98ED,0xD3DF), + bn_pack4(0xC81F,0x56E8,0x80B9,0x6E71), + bn_pack4(0x9E30,0x50E2,0x7656,0x94DF), + bn_pack4(0x9558,0xE447,0x5677,0xE9AA), + bn_pack4(0xC919,0x0DA6,0xFC02,0x6E47), + bn_pack4(0x889A,0x002E,0xD5EE,0x382B), + bn_pack4(0x4009,0x438B,0x481C,0x6CD7), + bn_pack4(0x3590,0x46F4,0xEB87,0x9F92), + bn_pack4(0xFAF3,0x6BC3,0x1ECF,0xA268), + bn_pack4(0xB1D5,0x10BD,0x7EE7,0x4D73), + bn_pack4(0xF9AB,0x4819,0x5DED,0x7EA1), + bn_pack4(0x64F3,0x1CC5,0x0846,0x851D), + bn_pack4(0x4597,0xE899,0xA025,0x5DC1), + bn_pack4(0xDF31,0x0EE0,0x74AB,0x6A36), + bn_pack4(0x6D2A,0x13F8,0x3F44,0xF82D), + bn_pack4(0x062B,0x3CF5,0xB3A2,0x78A6), + bn_pack4(0x7968,0x3303,0xED5B,0xDD3A), + bn_pack4(0xFA9D,0x4B7F,0xA2C0,0x87E8), + bn_pack4(0x4BCB,0xC886,0x2F83,0x85DD), + bn_pack4(0x3473,0xFC64,0x6CEA,0x306B), + bn_pack4(0x13EB,0x57A8,0x1A23,0xF0C7), + bn_pack4(0x2222,0x2E04,0xA403,0x7C07), + bn_pack4(0xE3FD,0xB8BE,0xFC84,0x8AD9), + bn_pack4(0x238F,0x16CB,0xE39D,0x652D), + bn_pack4(0x3423,0xB474,0x2BF1,0xC978), + bn_pack4(0x3AAB,0x639C,0x5AE4,0xF568), + bn_pack4(0x2576,0xF693,0x6BA4,0x2466), + bn_pack4(0x741F,0xA7BF,0x8AFC,0x47ED), + bn_pack4(0x3BC8,0x32B6,0x8D9D,0xD300), + bn_pack4(0xD8BE,0xC4D0,0x73B9,0x31BA), + bn_pack4(0x3877,0x7CB6,0xA932,0xDF8C), + bn_pack4(0x74A3,0x926F,0x12FE,0xE5E4), + bn_pack4(0xE694,0xF91E,0x6DBE,0x1159), + bn_pack4(0x12BF,0x2D5B,0x0B74,0x74D6), + bn_pack4(0x043E,0x8F66,0x3F48,0x60EE), + bn_pack4(0x387F,0xE8D7,0x6E3C,0x0468), + bn_pack4(0xDA56,0xC9EC,0x2EF2,0x9632), + bn_pack4(0xEB19,0xCCB1,0xA313,0xD55C), + bn_pack4(0xF550,0xAA3D,0x8A1F,0xBFF0), + bn_pack4(0x06A1,0xD58B,0xB7C5,0xDA76), + bn_pack4(0xA797,0x15EE,0xF29B,0xE328), + bn_pack4(0x14CC,0x5ED2,0x0F80,0x37E0), + bn_pack4(0xCC8F,0x6D7E,0xBF48,0xE1D8), + bn_pack4(0x4BD4,0x07B2,0x2B41,0x54AA), + bn_pack4(0x0F1D,0x45B7,0xFF58,0x5AC5), + bn_pack4(0x23A9,0x7A7E,0x36CC,0x88BE), + bn_pack4(0x59E7,0xC97F,0xBEC7,0xE8F3), + bn_pack4(0xB5A8,0x4031,0x900B,0x1C9E), + bn_pack4(0xD55E,0x702F,0x4698,0x0C82), + bn_pack4(0xF482,0xD7CE,0x6E74,0xFEF6), + bn_pack4(0xF032,0xEA15,0xD172,0x1D03), + bn_pack4(0x5983,0xCA01,0xC64B,0x92EC), + bn_pack4(0x6FB8,0xF401,0x378C,0xD2BF), + bn_pack4(0x3320,0x5151,0x2BD7,0xAF42), + bn_pack4(0xDB7F,0x1447,0xE6CC,0x254B), + bn_pack4(0x44CE,0x6CBA,0xCED4,0xBB1B), + bn_pack4(0xDA3E,0xDBEB,0xCF9B,0x14ED), + bn_pack4(0x1797,0x27B0,0x865A,0x8918), + bn_pack4(0xB06A,0x53ED,0x9027,0xD831), + bn_pack4(0xE5DB,0x382F,0x4130,0x01AE), + bn_pack4(0xF8FF,0x9406,0xAD9E,0x530E), + bn_pack4(0xC975,0x1E76,0x3DBA,0x37BD), + bn_pack4(0xC1D4,0xDCB2,0x6026,0x46DE), + bn_pack4(0x36C3,0xFAB4,0xD27C,0x7026), + bn_pack4(0x4DF4,0x35C9,0x3402,0x8492), + bn_pack4(0x86FF,0xB7DC,0x90A6,0xC08F), + bn_pack4(0x93B4,0xEA98,0x8D8F,0xDDC1), + bn_pack4(0xD006,0x9127,0xD5B0,0x5AA9), + bn_pack4(0xB81B,0xDD76,0x2170,0x481C), + bn_pack4(0x1F61,0x2970,0xCEE2,0xD7AF), + bn_pack4(0x233B,0xA186,0x515B,0xE7ED), + bn_pack4(0x99B2,0x964F,0xA090,0xC3A2), + bn_pack4(0x287C,0x5947,0x4E6B,0xC05D), + bn_pack4(0x2E8E,0xFC14,0x1FBE,0xCAA6), + bn_pack4(0xDBBB,0xC2DB,0x04DE,0x8EF9), + bn_pack4(0x2583,0xE9CA,0x2AD4,0x4CE8), + bn_pack4(0x1A94,0x6834,0xB615,0x0BDA), + bn_pack4(0x99C3,0x2718,0x6AF4,0xE23C), + bn_pack4(0x8871,0x9A10,0xBDBA,0x5B26), + bn_pack4(0x1A72,0x3C12,0xA787,0xE6D7), + bn_pack4(0x4B82,0xD120,0xA921,0x0801), + bn_pack4(0x43DB,0x5BFC,0xE0FD,0x108E), + bn_pack4(0x08E2,0x4FA0,0x74E5,0xAB31), + bn_pack4(0x7709,0x88C0,0xBAD9,0x46E2), + bn_pack4(0xBBE1,0x1757,0x7A61,0x5D6C), + bn_pack4(0x521F,0x2B18,0x177B,0x200C), + bn_pack4(0xD876,0x0273,0x3EC8,0x6A64), + bn_pack4(0xF12F,0xFA06,0xD98A,0x0864), + bn_pack4(0xCEE3,0xD226,0x1AD2,0xEE6B), + bn_pack4(0x1E8C,0x94E0,0x4A25,0x619D), + bn_pack4(0xABF5,0xAE8C,0xDB09,0x33D7), + bn_pack4(0xB397,0x0F85,0xA6E1,0xE4C7), + bn_pack4(0x8AEA,0x7157,0x5D06,0x0C7D), + bn_pack4(0xECFB,0x8504,0x58DB,0xEF0A), + bn_pack4(0xA855,0x21AB,0xDF1C,0xBA64), + bn_pack4(0xAD33,0x170D,0x0450,0x7A33), + bn_pack4(0x1572,0x8E5A,0x8AAA,0xC42D), + bn_pack4(0x15D2,0x2618,0x98FA,0x0510), + bn_pack4(0x3995,0x497C,0xEA95,0x6AE5), + bn_pack4(0xDE2B,0xCBF6,0x9558,0x1718), + bn_pack4(0xB5C5,0x5DF0,0x6F4C,0x52C9), + bn_pack4(0x9B27,0x83A2,0xEC07,0xA28F), + bn_pack4(0xE39E,0x772C,0x180E,0x8603), + bn_pack4(0x3290,0x5E46,0x2E36,0xCE3B), + bn_pack4(0xF174,0x6C08,0xCA18,0x217C), + bn_pack4(0x670C,0x354E,0x4ABC,0x9804), + bn_pack4(0x9ED5,0x2907,0x7096,0x966D), + bn_pack4(0x1C62,0xF356,0x2085,0x52BB), + bn_pack4(0x8365,0x5D23,0xDCA3,0xAD96), + bn_pack4(0x6916,0x3FA8,0xFD24,0xCF5F), + bn_pack4(0x98DA,0x4836,0x1C55,0xD39A), + bn_pack4(0xC200,0x7CB8,0xA163,0xBF05), + bn_pack4(0x4928,0x6651,0xECE4,0x5B3D), + bn_pack4(0xAE9F,0x2411,0x7C4B,0x1FE6), + bn_pack4(0xEE38,0x6BFB,0x5A89,0x9FA5), + bn_pack4(0x0BFF,0x5CB6,0xF406,0xB7ED), + bn_pack4(0xF44C,0x42E9,0xA637,0xED6B), + bn_pack4(0xE485,0xB576,0x625E,0x7EC6), + bn_pack4(0x4FE1,0x356D,0x6D51,0xC245), + bn_pack4(0x302B,0x0A6D,0xF25F,0x1437), + bn_pack4(0xEF95,0x19B3,0xCD3A,0x431B), + bn_pack4(0x514A,0x0879,0x8E34,0x04DD), + bn_pack4(0x020B,0xBEA6,0x3B13,0x9B22), + bn_pack4(0x2902,0x4E08,0x8A67,0xCC74), + bn_pack4(0xC4C6,0x628B,0x80DC,0x1CD1), + bn_pack4(0xC90F,0xDAA2,0x2168,0xC234), + bn_pack4(0xFFFF,0xFFFF,0xFFFF,0xFFFF) +}; +static BIGNUM bn_group_8192 = { + bn_group_8192_value, + (sizeof bn_group_8192_value)/sizeof(BN_ULONG), + (sizeof bn_group_8192_value)/sizeof(BN_ULONG), + 0, + BN_FLG_STATIC_DATA +}; + +static BN_ULONG bn_generator_19_value[] = {19} ; +static BIGNUM bn_generator_19 = { + bn_generator_19_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; +static BN_ULONG bn_generator_5_value[] = {5} ; +static BIGNUM bn_generator_5 = { + bn_generator_5_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; +static BN_ULONG bn_generator_2_value[] = {2} ; +static BIGNUM bn_generator_2 = { + bn_generator_2_value, + 1, + 1, + 0, + BN_FLG_STATIC_DATA +}; + +static SRP_gN knowngN[] = { + {"8192",&bn_generator_19 , &bn_group_8192}, + {"6144",&bn_generator_5 , &bn_group_6144}, + {"4096",&bn_generator_5 , &bn_group_4096}, + {"3072",&bn_generator_5 , &bn_group_3072}, + {"2048",&bn_generator_2 , &bn_group_2048}, + {"1536",&bn_generator_2 , &bn_group_1536}, + {"1024",&bn_generator_2 , &bn_group_1024}, +}; +#define KNOWN_GN_NUMBER sizeof(knowngN) / sizeof(SRP_gN) + +/* end of generated data */ diff --git a/app/openssl/crypto/srp/srp_lcl.h b/app/openssl/crypto/srp/srp_lcl.h new file mode 100644 index 00000000..42bda3f1 --- /dev/null +++ b/app/openssl/crypto/srp/srp_lcl.h @@ -0,0 +1,83 @@ +/* crypto/srp/srp_lcl.h */ +/* Written by Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#ifndef HEADER_SRP_LCL_H +#define HEADER_SRP_LCL_H + +#include +#include + +#if 0 +#define srp_bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \ + fprintf(stderr,"\n");} +#else +#define srp_bn_print(a) +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/app/openssl/crypto/srp/srp_lib.c b/app/openssl/crypto/srp/srp_lib.c new file mode 100644 index 00000000..7c1dcc51 --- /dev/null +++ b/app/openssl/crypto/srp/srp_lib.c @@ -0,0 +1,361 @@ +/* crypto/srp/srp_lib.c */ +/* Written by Christophe Renou (christophe.renou@edelweb.fr) with + * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#ifndef OPENSSL_NO_SRP +#include "cryptlib.h" +#include "srp_lcl.h" +#include +#include + +#if (BN_BYTES == 8) +# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +# define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64) +# elif defined(__arch64__) +# define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL) +# else +# define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL) +# endif +#elif (BN_BYTES == 4) +# define bn_pack4(a1,a2,a3,a4) ((a3##UL<<16)|a4##UL), ((a1##UL<<16)|a2##UL) +#else +# error "unsupported BN_BYTES" +#endif + + +#include "srp_grps.h" + +static BIGNUM *srp_Calc_k(BIGNUM *N, BIGNUM *g) + { + /* k = SHA1(N | PAD(g)) -- tls-srp draft 8 */ + + unsigned char digest[SHA_DIGEST_LENGTH]; + unsigned char *tmp; + EVP_MD_CTX ctxt; + int longg ; + int longN = BN_num_bytes(N); + + if ((tmp = OPENSSL_malloc(longN)) == NULL) + return NULL; + BN_bn2bin(N,tmp) ; + + EVP_MD_CTX_init(&ctxt); + EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); + EVP_DigestUpdate(&ctxt, tmp, longN); + + memset(tmp, 0, longN); + longg = BN_bn2bin(g,tmp) ; + /* use the zeros behind to pad on left */ + EVP_DigestUpdate(&ctxt, tmp + longg, longN-longg); + EVP_DigestUpdate(&ctxt, tmp, longg); + OPENSSL_free(tmp); + + EVP_DigestFinal_ex(&ctxt, digest, NULL); + EVP_MD_CTX_cleanup(&ctxt); + return BN_bin2bn(digest, sizeof(digest), NULL); + } + +BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N) + { + /* k = SHA1(PAD(A) || PAD(B) ) -- tls-srp draft 8 */ + + BIGNUM *u; + unsigned char cu[SHA_DIGEST_LENGTH]; + unsigned char *cAB; + EVP_MD_CTX ctxt; + int longN; + if ((A == NULL) ||(B == NULL) || (N == NULL)) + return NULL; + + longN= BN_num_bytes(N); + + if ((cAB = OPENSSL_malloc(2*longN)) == NULL) + return NULL; + + memset(cAB, 0, longN); + + EVP_MD_CTX_init(&ctxt); + EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); + EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(A,cAB+longN), longN); + EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(B,cAB+longN), longN); + OPENSSL_free(cAB); + EVP_DigestFinal_ex(&ctxt, cu, NULL); + EVP_MD_CTX_cleanup(&ctxt); + + if (!(u = BN_bin2bn(cu, sizeof(cu), NULL))) + return NULL; + if (!BN_is_zero(u)) + return u; + BN_free(u); + return NULL; +} + +BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N) + { + BIGNUM *tmp = NULL, *S = NULL; + BN_CTX *bn_ctx; + + if (u == NULL || A == NULL || v == NULL || b == NULL || N == NULL) + return NULL; + + if ((bn_ctx = BN_CTX_new()) == NULL || + (tmp = BN_new()) == NULL || + (S = BN_new()) == NULL ) + goto err; + + /* S = (A*v**u) ** b */ + + if (!BN_mod_exp(tmp,v,u,N,bn_ctx)) + goto err; + if (!BN_mod_mul(tmp,A,tmp,N,bn_ctx)) + goto err; + if (!BN_mod_exp(S,tmp,b,N,bn_ctx)) + goto err; +err: + BN_CTX_free(bn_ctx); + BN_clear_free(tmp); + return S; + } + +BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v) + { + BIGNUM *kv = NULL, *gb = NULL; + BIGNUM *B = NULL, *k = NULL; + BN_CTX *bn_ctx; + + if (b == NULL || N == NULL || g == NULL || v == NULL || + (bn_ctx = BN_CTX_new()) == NULL) + return NULL; + + if ( (kv = BN_new()) == NULL || + (gb = BN_new()) == NULL || + (B = BN_new())== NULL) + goto err; + + /* B = g**b + k*v */ + + if (!BN_mod_exp(gb,g,b,N,bn_ctx) || + !(k = srp_Calc_k(N,g)) || + !BN_mod_mul(kv,v,k,N,bn_ctx) || + !BN_mod_add(B,gb,kv,N,bn_ctx)) + { + BN_free(B); + B = NULL; + } +err: + BN_CTX_free(bn_ctx); + BN_clear_free(kv); + BN_clear_free(gb); + BN_free(k); + return B; + } + +BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass) + { + unsigned char dig[SHA_DIGEST_LENGTH]; + EVP_MD_CTX ctxt; + unsigned char *cs; + + if ((s == NULL) || + (user == NULL) || + (pass == NULL)) + return NULL; + + if ((cs = OPENSSL_malloc(BN_num_bytes(s))) == NULL) + return NULL; + + EVP_MD_CTX_init(&ctxt); + EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); + EVP_DigestUpdate(&ctxt, user, strlen(user)); + EVP_DigestUpdate(&ctxt, ":", 1); + EVP_DigestUpdate(&ctxt, pass, strlen(pass)); + EVP_DigestFinal_ex(&ctxt, dig, NULL); + + EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); + BN_bn2bin(s,cs); + EVP_DigestUpdate(&ctxt, cs, BN_num_bytes(s)); + OPENSSL_free(cs); + EVP_DigestUpdate(&ctxt, dig, sizeof(dig)); + EVP_DigestFinal_ex(&ctxt, dig, NULL); + EVP_MD_CTX_cleanup(&ctxt); + + return BN_bin2bn(dig, sizeof(dig), NULL); + } + +BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g) + { + BN_CTX *bn_ctx; + BIGNUM * A = NULL; + + if (a == NULL || N == NULL || g == NULL || + (bn_ctx = BN_CTX_new()) == NULL) + return NULL; + + if ((A = BN_new()) != NULL && + !BN_mod_exp(A,g,a,N,bn_ctx)) + { + BN_free(A); + A = NULL; + } + BN_CTX_free(bn_ctx); + return A; + } + + +BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u) + { + BIGNUM *tmp = NULL, *tmp2 = NULL, *tmp3 = NULL , *k = NULL, *K = NULL; + BN_CTX *bn_ctx; + + if (u == NULL || B == NULL || N == NULL || g == NULL || x == NULL || a == NULL || + (bn_ctx = BN_CTX_new()) == NULL) + return NULL; + + if ((tmp = BN_new()) == NULL || + (tmp2 = BN_new())== NULL || + (tmp3 = BN_new())== NULL || + (K = BN_new()) == NULL) + goto err; + + if (!BN_mod_exp(tmp,g,x,N,bn_ctx)) + goto err; + if (!(k = srp_Calc_k(N,g))) + goto err; + if (!BN_mod_mul(tmp2,tmp,k,N,bn_ctx)) + goto err; + if (!BN_mod_sub(tmp,B,tmp2,N,bn_ctx)) + goto err; + + if (!BN_mod_mul(tmp3,u,x,N,bn_ctx)) + goto err; + if (!BN_mod_add(tmp2,a,tmp3,N,bn_ctx)) + goto err; + if (!BN_mod_exp(K,tmp,tmp2,N,bn_ctx)) + goto err; + +err : + BN_CTX_free(bn_ctx); + BN_clear_free(tmp); + BN_clear_free(tmp2); + BN_clear_free(tmp3); + BN_free(k); + return K; + } + +int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N) + { + BIGNUM *r; + BN_CTX *bn_ctx; + int ret = 0; + + if (B == NULL || N == NULL || + (bn_ctx = BN_CTX_new()) == NULL) + return 0; + + if ((r = BN_new()) == NULL) + goto err; + /* Checks if B % N == 0 */ + if (!BN_nnmod(r,B,N,bn_ctx)) + goto err; + ret = !BN_is_zero(r); +err: + BN_CTX_free(bn_ctx); + BN_free(r); + return ret; + } + +int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N) + { + /* Checks if A % N == 0 */ + return SRP_Verify_B_mod_N(A,N) ; + } + + +/* Check if G and N are kwown parameters. + The values have been generated from the ietf-tls-srp draft version 8 +*/ +char *SRP_check_known_gN_param(BIGNUM *g, BIGNUM *N) + { + size_t i; + if ((g == NULL) || (N == NULL)) + return 0; + + srp_bn_print(g); + srp_bn_print(N); + + for(i = 0; i < KNOWN_GN_NUMBER; i++) + { + if (BN_cmp(knowngN[i].g, g) == 0 && BN_cmp(knowngN[i].N, N) == 0) + return knowngN[i].id; + } + return NULL; + } + +SRP_gN *SRP_get_default_gN(const char *id) + { + size_t i; + + if (id == NULL) + return knowngN; + for(i = 0; i < KNOWN_GN_NUMBER; i++) + { + if (strcmp(knowngN[i].id, id)==0) + return knowngN + i; + } + return NULL; + } +#endif diff --git a/app/openssl/crypto/srp/srp_vfy.c b/app/openssl/crypto/srp/srp_vfy.c new file mode 100644 index 00000000..4a3d13ed --- /dev/null +++ b/app/openssl/crypto/srp/srp_vfy.c @@ -0,0 +1,658 @@ +/* crypto/srp/srp_vfy.c */ +/* Written by Christophe Renou (christophe.renou@edelweb.fr) with + * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#ifndef OPENSSL_NO_SRP +#include "cryptlib.h" +#include "srp_lcl.h" +#include +#include +#include +#include +#include + +#define SRP_RANDOM_SALT_LEN 20 +#define MAX_LEN 2500 + +static char b64table[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./"; + +/* the following two conversion routines have been inspired by code from Stanford */ + +/* + * Convert a base64 string into raw byte array representation. + */ +static int t_fromb64(unsigned char *a, const char *src) + { + char *loc; + int i, j; + int size; + + while(*src && (*src == ' ' || *src == '\t' || *src == '\n')) + ++src; + size = strlen(src); + i = 0; + while(i < size) + { + loc = strchr(b64table, src[i]); + if(loc == (char *) 0) break; + else a[i] = loc - b64table; + ++i; + } + size = i; + i = size - 1; + j = size; + while(1) + { + a[j] = a[i]; + if(--i < 0) break; + a[j] |= (a[i] & 3) << 6; + --j; + a[j] = (unsigned char) ((a[i] & 0x3c) >> 2); + if(--i < 0) break; + a[j] |= (a[i] & 0xf) << 4; + --j; + a[j] = (unsigned char) ((a[i] & 0x30) >> 4); + if(--i < 0) break; + a[j] |= (a[i] << 2); + + a[--j] = 0; + if(--i < 0) break; + } + while(a[j] == 0 && j <= size) ++j; + i = 0; + while (j <= size) a[i++] = a[j++]; + return i; + } + + +/* + * Convert a raw byte string into a null-terminated base64 ASCII string. + */ +static char *t_tob64(char *dst, const unsigned char *src, int size) + { + int c, pos = size % 3; + unsigned char b0 = 0, b1 = 0, b2 = 0, notleading = 0; + char *olddst = dst; + + switch(pos) + { + case 1: + b2 = src[0]; + break; + case 2: + b1 = src[0]; + b2 = src[1]; + break; + } + + while(1) + { + c = (b0 & 0xfc) >> 2; + if(notleading || c != 0) + { + *dst++ = b64table[c]; + notleading = 1; + } + c = ((b0 & 3) << 4) | ((b1 & 0xf0) >> 4); + if(notleading || c != 0) + { + *dst++ = b64table[c]; + notleading = 1; + } + c = ((b1 & 0xf) << 2) | ((b2 & 0xc0) >> 6); + if(notleading || c != 0) + { + *dst++ = b64table[c]; + notleading = 1; + } + c = b2 & 0x3f; + if(notleading || c != 0) + { + *dst++ = b64table[c]; + notleading = 1; + } + if(pos >= size) break; + else + { + b0 = src[pos++]; + b1 = src[pos++]; + b2 = src[pos++]; + } + } + + *dst++ = '\0'; + return olddst; + } + +static void SRP_user_pwd_free(SRP_user_pwd *user_pwd) + { + if (user_pwd == NULL) + return; + BN_free(user_pwd->s); + BN_clear_free(user_pwd->v); + OPENSSL_free(user_pwd->id); + OPENSSL_free(user_pwd->info); + OPENSSL_free(user_pwd); + } + +static SRP_user_pwd *SRP_user_pwd_new() + { + SRP_user_pwd *ret = OPENSSL_malloc(sizeof(SRP_user_pwd)); + if (ret == NULL) + return NULL; + ret->N = NULL; + ret->g = NULL; + ret->s = NULL; + ret->v = NULL; + ret->id = NULL ; + ret->info = NULL; + return ret; + } + +static void SRP_user_pwd_set_gN(SRP_user_pwd *vinfo, const BIGNUM *g, + const BIGNUM *N) + { + vinfo->N = N; + vinfo->g = g; + } + +static int SRP_user_pwd_set_ids(SRP_user_pwd *vinfo, const char *id, + const char *info) + { + if (id != NULL && NULL == (vinfo->id = BUF_strdup(id))) + return 0; + return (info == NULL || NULL != (vinfo->info = BUF_strdup(info))) ; + } + +static int SRP_user_pwd_set_sv(SRP_user_pwd *vinfo, const char *s, + const char *v) + { + unsigned char tmp[MAX_LEN]; + int len; + + if (strlen(s) > MAX_LEN || strlen(v) > MAX_LEN) + return 0; + len = t_fromb64(tmp, v); + if (NULL == (vinfo->v = BN_bin2bn(tmp, len, NULL)) ) + return 0; + len = t_fromb64(tmp, s); + return ((vinfo->s = BN_bin2bn(tmp, len, NULL)) != NULL) ; + } + +static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v) + { + vinfo->v = v; + vinfo->s = s; + return (vinfo->s != NULL && vinfo->v != NULL) ; + } + +SRP_VBASE *SRP_VBASE_new(char *seed_key) + { + SRP_VBASE *vb = (SRP_VBASE *) OPENSSL_malloc(sizeof(SRP_VBASE)); + + if (vb == NULL) + return NULL; + if (!(vb->users_pwd = sk_SRP_user_pwd_new_null()) || + !(vb->gN_cache = sk_SRP_gN_cache_new_null())) + { + OPENSSL_free(vb); + return NULL; + } + vb->default_g = NULL; + vb->default_N = NULL; + vb->seed_key = NULL; + if ((seed_key != NULL) && + (vb->seed_key = BUF_strdup(seed_key)) == NULL) + { + sk_SRP_user_pwd_free(vb->users_pwd); + sk_SRP_gN_cache_free(vb->gN_cache); + OPENSSL_free(vb); + return NULL; + } + return vb; + } + + +int SRP_VBASE_free(SRP_VBASE *vb) + { + sk_SRP_user_pwd_pop_free(vb->users_pwd,SRP_user_pwd_free); + sk_SRP_gN_cache_free(vb->gN_cache); + OPENSSL_free(vb->seed_key); + OPENSSL_free(vb); + return 0; + } + + +static SRP_gN_cache *SRP_gN_new_init(const char *ch) + { + unsigned char tmp[MAX_LEN]; + int len; + + SRP_gN_cache *newgN = (SRP_gN_cache *)OPENSSL_malloc(sizeof(SRP_gN_cache)); + if (newgN == NULL) + return NULL; + + if ((newgN->b64_bn = BUF_strdup(ch)) == NULL) + goto err; + + len = t_fromb64(tmp, ch); + if ((newgN->bn = BN_bin2bn(tmp, len, NULL))) + return newgN; + + OPENSSL_free(newgN->b64_bn); +err: + OPENSSL_free(newgN); + return NULL; + } + + +static void SRP_gN_free(SRP_gN_cache *gN_cache) + { + if (gN_cache == NULL) + return; + OPENSSL_free(gN_cache->b64_bn); + BN_free(gN_cache->bn); + OPENSSL_free(gN_cache); + } + +static SRP_gN *SRP_get_gN_by_id(const char *id, STACK_OF(SRP_gN) *gN_tab) + { + int i; + + SRP_gN *gN; + if (gN_tab != NULL) + for(i = 0; i < sk_SRP_gN_num(gN_tab); i++) + { + gN = sk_SRP_gN_value(gN_tab, i); + if (gN && (id == NULL || strcmp(gN->id,id)==0)) + return gN; + } + + return SRP_get_default_gN(id); + } + +static BIGNUM *SRP_gN_place_bn(STACK_OF(SRP_gN_cache) *gN_cache, char *ch) + { + int i; + if (gN_cache == NULL) + return NULL; + + /* search if we have already one... */ + for(i = 0; i < sk_SRP_gN_cache_num(gN_cache); i++) + { + SRP_gN_cache *cache = sk_SRP_gN_cache_value(gN_cache, i); + if (strcmp(cache->b64_bn,ch)==0) + return cache->bn; + } + { /* it is the first time that we find it */ + SRP_gN_cache *newgN = SRP_gN_new_init(ch); + if (newgN) + { + if (sk_SRP_gN_cache_insert(gN_cache,newgN,0)>0) + return newgN->bn; + SRP_gN_free(newgN); + } + } + return NULL; + } + +/* this function parses verifier file. Format is: + * string(index):base64(N):base64(g):0 + * string(username):base64(v):base64(salt):int(index) + */ + + +int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file) + { + int error_code ; + STACK_OF(SRP_gN) *SRP_gN_tab = sk_SRP_gN_new_null(); + char *last_index = NULL; + int i; + char **pp; + + SRP_gN *gN = NULL; + SRP_user_pwd *user_pwd = NULL ; + + TXT_DB *tmpdb = NULL; + BIO *in = BIO_new(BIO_s_file()); + + error_code = SRP_ERR_OPEN_FILE; + + if (in == NULL || BIO_read_filename(in,verifier_file) <= 0) + goto err; + + error_code = SRP_ERR_VBASE_INCOMPLETE_FILE; + + if ((tmpdb =TXT_DB_read(in,DB_NUMBER)) == NULL) + goto err; + + error_code = SRP_ERR_MEMORY; + + + if (vb->seed_key) + { + last_index = SRP_get_default_gN(NULL)->id; + } + for (i = 0; i < sk_OPENSSL_PSTRING_num(tmpdb->data); i++) + { + pp = sk_OPENSSL_PSTRING_value(tmpdb->data,i); + if (pp[DB_srptype][0] == DB_SRP_INDEX) + { + /*we add this couple in the internal Stack */ + + if ((gN = (SRP_gN *)OPENSSL_malloc(sizeof(SRP_gN))) == NULL) + goto err; + + if (!(gN->id = BUF_strdup(pp[DB_srpid])) + || !(gN->N = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpverifier])) + || !(gN->g = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpsalt])) + || sk_SRP_gN_insert(SRP_gN_tab,gN,0) == 0) + goto err; + + gN = NULL; + + if (vb->seed_key != NULL) + { + last_index = pp[DB_srpid]; + } + } + else if (pp[DB_srptype][0] == DB_SRP_VALID) + { + /* it is a user .... */ + SRP_gN *lgN; + if ((lgN = SRP_get_gN_by_id(pp[DB_srpgN],SRP_gN_tab))!=NULL) + { + error_code = SRP_ERR_MEMORY; + if ((user_pwd = SRP_user_pwd_new()) == NULL) + goto err; + + SRP_user_pwd_set_gN(user_pwd,lgN->g,lgN->N); + if (!SRP_user_pwd_set_ids(user_pwd, pp[DB_srpid],pp[DB_srpinfo])) + goto err; + + error_code = SRP_ERR_VBASE_BN_LIB; + if (!SRP_user_pwd_set_sv(user_pwd, pp[DB_srpsalt],pp[DB_srpverifier])) + goto err; + + if (sk_SRP_user_pwd_insert(vb->users_pwd, user_pwd, 0) == 0) + goto err; + user_pwd = NULL; /* abandon responsability */ + } + } + } + + if (last_index != NULL) + { + /* this means that we want to simulate a default user */ + + if (((gN = SRP_get_gN_by_id(last_index,SRP_gN_tab))==NULL)) + { + error_code = SRP_ERR_VBASE_BN_LIB; + goto err; + } + vb->default_g = gN->g ; + vb->default_N = gN->N ; + gN = NULL ; + } + error_code = SRP_NO_ERROR; + + err: + /* there may be still some leaks to fix, if this fails, the application terminates most likely */ + + if (gN != NULL) + { + OPENSSL_free(gN->id); + OPENSSL_free(gN); + } + + SRP_user_pwd_free(user_pwd); + + if (tmpdb) TXT_DB_free(tmpdb); + if (in) BIO_free_all(in); + + sk_SRP_gN_free(SRP_gN_tab); + + return error_code; + + } + + +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) + { + int i; + SRP_user_pwd *user; + unsigned char digv[SHA_DIGEST_LENGTH]; + unsigned char digs[SHA_DIGEST_LENGTH]; + EVP_MD_CTX ctxt; + + if (vb == NULL) + return NULL; + for(i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++) + { + user = sk_SRP_user_pwd_value(vb->users_pwd, i); + if (strcmp(user->id,username)==0) + return user; + } + if ((vb->seed_key == NULL) || + (vb->default_g == NULL) || + (vb->default_N == NULL)) + return NULL; + +/* if the user is unknown we set parameters as well if we have a seed_key */ + + if ((user = SRP_user_pwd_new()) == NULL) + return NULL; + + SRP_user_pwd_set_gN(user,vb->default_g,vb->default_N); + + if (!SRP_user_pwd_set_ids(user,username,NULL)) + goto err; + + RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH); + EVP_MD_CTX_init(&ctxt); + EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); + EVP_DigestUpdate(&ctxt, vb->seed_key, strlen(vb->seed_key)); + EVP_DigestUpdate(&ctxt, username, strlen(username)); + EVP_DigestFinal_ex(&ctxt, digs, NULL); + EVP_MD_CTX_cleanup(&ctxt); + if (SRP_user_pwd_set_sv_BN(user, BN_bin2bn(digs,SHA_DIGEST_LENGTH,NULL), BN_bin2bn(digv,SHA_DIGEST_LENGTH, NULL))) + return user; + +err: SRP_user_pwd_free(user); + return NULL; + } + + +/* + create a verifier (*salt,*verifier,g and N are in base64) +*/ +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g) + { + int len; + char * result=NULL; + char *vf; + BIGNUM *N_bn = NULL, *g_bn = NULL, *s = NULL, *v = NULL; + unsigned char tmp[MAX_LEN]; + unsigned char tmp2[MAX_LEN]; + char * defgNid = NULL; + + if ((user == NULL)|| + (pass == NULL)|| + (salt == NULL)|| + (verifier == NULL)) + goto err; + + if (N) + { + if (!(len = t_fromb64(tmp, N))) goto err; + N_bn = BN_bin2bn(tmp, len, NULL); + if (!(len = t_fromb64(tmp, g))) goto err; + g_bn = BN_bin2bn(tmp, len, NULL); + defgNid = "*"; + } + else + { + SRP_gN * gN = SRP_get_gN_by_id(g, NULL) ; + if (gN == NULL) + goto err; + N_bn = gN->N; + g_bn = gN->g; + defgNid = gN->id; + } + + if (*salt == NULL) + { + RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN); + + s = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); + } + else + { + if (!(len = t_fromb64(tmp2, *salt))) + goto err; + s = BN_bin2bn(tmp2, len, NULL); + } + + + if(!SRP_create_verifier_BN(user, pass, &s, &v, N_bn, g_bn)) goto err; + + BN_bn2bin(v,tmp); + if (((vf = OPENSSL_malloc(BN_num_bytes(v)*2)) == NULL)) + goto err; + t_tob64(vf, tmp, BN_num_bytes(v)); + + *verifier = vf; + if (*salt == NULL) + { + char *tmp_salt; + + if ((tmp_salt = OPENSSL_malloc(SRP_RANDOM_SALT_LEN * 2)) == NULL) + { + OPENSSL_free(vf); + goto err; + } + t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN); + *salt = tmp_salt; + } + + result=defgNid; + +err: + if(N) + { + BN_free(N_bn); + BN_free(g_bn); + } + return result; + } + +/* + create a verifier (*salt,*verifier,g and N are BIGNUMs) +*/ +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g) + { + int result=0; + BIGNUM *x = NULL; + BN_CTX *bn_ctx = BN_CTX_new(); + unsigned char tmp2[MAX_LEN]; + + if ((user == NULL)|| + (pass == NULL)|| + (salt == NULL)|| + (verifier == NULL)|| + (N == NULL)|| + (g == NULL)|| + (bn_ctx == NULL)) + goto err; + + srp_bn_print(N); + srp_bn_print(g); + + if (*salt == NULL) + { + RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN); + + *salt = BN_bin2bn(tmp2,SRP_RANDOM_SALT_LEN,NULL); + } + + x = SRP_Calc_x(*salt,user,pass); + + *verifier = BN_new(); + if(*verifier == NULL) goto err; + + if (!BN_mod_exp(*verifier,g,x,N,bn_ctx)) + { + BN_clear_free(*verifier); + goto err; + } + + srp_bn_print(*verifier); + + result=1; + +err: + + BN_clear_free(x); + BN_CTX_free(bn_ctx); + return result; + } + + + +#endif diff --git a/app/openssl/crypto/stack/safestack.h b/app/openssl/crypto/stack/safestack.h index 3e76aa58..ea3aa0d8 100644 --- a/app/openssl/crypto/stack/safestack.h +++ b/app/openssl/crypto/stack/safestack.h @@ -1459,6 +1459,94 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_POLICY_MAPPING_sort(st) SKM_sk_sort(POLICY_MAPPING, (st)) #define sk_POLICY_MAPPING_is_sorted(st) SKM_sk_is_sorted(POLICY_MAPPING, (st)) +#define sk_SRP_gN_new(cmp) SKM_sk_new(SRP_gN, (cmp)) +#define sk_SRP_gN_new_null() SKM_sk_new_null(SRP_gN) +#define sk_SRP_gN_free(st) SKM_sk_free(SRP_gN, (st)) +#define sk_SRP_gN_num(st) SKM_sk_num(SRP_gN, (st)) +#define sk_SRP_gN_value(st, i) SKM_sk_value(SRP_gN, (st), (i)) +#define sk_SRP_gN_set(st, i, val) SKM_sk_set(SRP_gN, (st), (i), (val)) +#define sk_SRP_gN_zero(st) SKM_sk_zero(SRP_gN, (st)) +#define sk_SRP_gN_push(st, val) SKM_sk_push(SRP_gN, (st), (val)) +#define sk_SRP_gN_unshift(st, val) SKM_sk_unshift(SRP_gN, (st), (val)) +#define sk_SRP_gN_find(st, val) SKM_sk_find(SRP_gN, (st), (val)) +#define sk_SRP_gN_find_ex(st, val) SKM_sk_find_ex(SRP_gN, (st), (val)) +#define sk_SRP_gN_delete(st, i) SKM_sk_delete(SRP_gN, (st), (i)) +#define sk_SRP_gN_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_gN, (st), (ptr)) +#define sk_SRP_gN_insert(st, val, i) SKM_sk_insert(SRP_gN, (st), (val), (i)) +#define sk_SRP_gN_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN, (st), (cmp)) +#define sk_SRP_gN_dup(st) SKM_sk_dup(SRP_gN, st) +#define sk_SRP_gN_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN, (st), (free_func)) +#define sk_SRP_gN_shift(st) SKM_sk_shift(SRP_gN, (st)) +#define sk_SRP_gN_pop(st) SKM_sk_pop(SRP_gN, (st)) +#define sk_SRP_gN_sort(st) SKM_sk_sort(SRP_gN, (st)) +#define sk_SRP_gN_is_sorted(st) SKM_sk_is_sorted(SRP_gN, (st)) + +#define sk_SRP_gN_cache_new(cmp) SKM_sk_new(SRP_gN_cache, (cmp)) +#define sk_SRP_gN_cache_new_null() SKM_sk_new_null(SRP_gN_cache) +#define sk_SRP_gN_cache_free(st) SKM_sk_free(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_num(st) SKM_sk_num(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_value(st, i) SKM_sk_value(SRP_gN_cache, (st), (i)) +#define sk_SRP_gN_cache_set(st, i, val) SKM_sk_set(SRP_gN_cache, (st), (i), (val)) +#define sk_SRP_gN_cache_zero(st) SKM_sk_zero(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_push(st, val) SKM_sk_push(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_unshift(st, val) SKM_sk_unshift(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_find(st, val) SKM_sk_find(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_find_ex(st, val) SKM_sk_find_ex(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_delete(st, i) SKM_sk_delete(SRP_gN_cache, (st), (i)) +#define sk_SRP_gN_cache_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_gN_cache, (st), (ptr)) +#define sk_SRP_gN_cache_insert(st, val, i) SKM_sk_insert(SRP_gN_cache, (st), (val), (i)) +#define sk_SRP_gN_cache_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN_cache, (st), (cmp)) +#define sk_SRP_gN_cache_dup(st) SKM_sk_dup(SRP_gN_cache, st) +#define sk_SRP_gN_cache_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN_cache, (st), (free_func)) +#define sk_SRP_gN_cache_shift(st) SKM_sk_shift(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_pop(st) SKM_sk_pop(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_sort(st) SKM_sk_sort(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_is_sorted(st) SKM_sk_is_sorted(SRP_gN_cache, (st)) + +#define sk_SRP_user_pwd_new(cmp) SKM_sk_new(SRP_user_pwd, (cmp)) +#define sk_SRP_user_pwd_new_null() SKM_sk_new_null(SRP_user_pwd) +#define sk_SRP_user_pwd_free(st) SKM_sk_free(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_num(st) SKM_sk_num(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_value(st, i) SKM_sk_value(SRP_user_pwd, (st), (i)) +#define sk_SRP_user_pwd_set(st, i, val) SKM_sk_set(SRP_user_pwd, (st), (i), (val)) +#define sk_SRP_user_pwd_zero(st) SKM_sk_zero(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_push(st, val) SKM_sk_push(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_unshift(st, val) SKM_sk_unshift(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_find(st, val) SKM_sk_find(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_find_ex(st, val) SKM_sk_find_ex(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_delete(st, i) SKM_sk_delete(SRP_user_pwd, (st), (i)) +#define sk_SRP_user_pwd_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_user_pwd, (st), (ptr)) +#define sk_SRP_user_pwd_insert(st, val, i) SKM_sk_insert(SRP_user_pwd, (st), (val), (i)) +#define sk_SRP_user_pwd_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_user_pwd, (st), (cmp)) +#define sk_SRP_user_pwd_dup(st) SKM_sk_dup(SRP_user_pwd, st) +#define sk_SRP_user_pwd_pop_free(st, free_func) SKM_sk_pop_free(SRP_user_pwd, (st), (free_func)) +#define sk_SRP_user_pwd_shift(st) SKM_sk_shift(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_pop(st) SKM_sk_pop(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_sort(st) SKM_sk_sort(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_is_sorted(st) SKM_sk_is_sorted(SRP_user_pwd, (st)) + +#define sk_SRTP_PROTECTION_PROFILE_new(cmp) SKM_sk_new(SRTP_PROTECTION_PROFILE, (cmp)) +#define sk_SRTP_PROTECTION_PROFILE_new_null() SKM_sk_new_null(SRTP_PROTECTION_PROFILE) +#define sk_SRTP_PROTECTION_PROFILE_free(st) SKM_sk_free(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_num(st) SKM_sk_num(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_value(st, i) SKM_sk_value(SRTP_PROTECTION_PROFILE, (st), (i)) +#define sk_SRTP_PROTECTION_PROFILE_set(st, i, val) SKM_sk_set(SRTP_PROTECTION_PROFILE, (st), (i), (val)) +#define sk_SRTP_PROTECTION_PROFILE_zero(st) SKM_sk_zero(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_push(st, val) SKM_sk_push(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_unshift(st, val) SKM_sk_unshift(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_find(st, val) SKM_sk_find(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_find_ex(st, val) SKM_sk_find_ex(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_delete(st, i) SKM_sk_delete(SRTP_PROTECTION_PROFILE, (st), (i)) +#define sk_SRTP_PROTECTION_PROFILE_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRTP_PROTECTION_PROFILE, (st), (ptr)) +#define sk_SRTP_PROTECTION_PROFILE_insert(st, val, i) SKM_sk_insert(SRTP_PROTECTION_PROFILE, (st), (val), (i)) +#define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRTP_PROTECTION_PROFILE, (st), (cmp)) +#define sk_SRTP_PROTECTION_PROFILE_dup(st) SKM_sk_dup(SRTP_PROTECTION_PROFILE, st) +#define sk_SRTP_PROTECTION_PROFILE_pop_free(st, free_func) SKM_sk_pop_free(SRTP_PROTECTION_PROFILE, (st), (free_func)) +#define sk_SRTP_PROTECTION_PROFILE_shift(st) SKM_sk_shift(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_pop(st) SKM_sk_pop(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_sort(st) SKM_sk_sort(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_is_sorted(st) SKM_sk_is_sorted(SRTP_PROTECTION_PROFILE, (st)) + #define sk_SSL_CIPHER_new(cmp) SKM_sk_new(SSL_CIPHER, (cmp)) #define sk_SSL_CIPHER_new_null() SKM_sk_new_null(SSL_CIPHER) #define sk_SSL_CIPHER_free(st) SKM_sk_free(SSL_CIPHER, (st)) @@ -2056,31 +2144,6 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_OPENSSL_STRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_STRING, (st)) -#define sk_OPENSSL_PSTRING_new(cmp) ((STACK_OF(OPENSSL_PSTRING) *)sk_new(CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) -#define sk_OPENSSL_PSTRING_new_null() ((STACK_OF(OPENSSL_PSTRING) *)sk_new_null()) -#define sk_OPENSSL_PSTRING_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i)) -#define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func)) -#define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i) -#define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_zero(st) SKM_sk_zero(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_unshift(st, val) sk_unshift(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_find_ex(st, val) sk_find_ex((_STACK *)CHECKED_CONST_PTR_OF(STACK_OF(OPENSSL_PSTRING), st), CHECKED_CONST_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_delete(st, i) SKM_sk_delete(OPENSSL_PSTRING, (st), (i)) -#define sk_OPENSSL_PSTRING_delete_ptr(st, ptr) (OPENSSL_PSTRING *)sk_delete_ptr(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, ptr)) -#define sk_OPENSSL_PSTRING_set_cmp_func(st, cmp) \ - ((int (*)(const OPENSSL_STRING * const *,const OPENSSL_STRING * const *)) \ - sk_set_cmp_func(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) -#define sk_OPENSSL_PSTRING_dup(st) SKM_sk_dup(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_shift(st) SKM_sk_shift(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_pop(st) (OPENSSL_STRING *)sk_pop(CHECKED_STACK_OF(OPENSSL_PSTRING, st)) -#define sk_OPENSSL_PSTRING_sort(st) SKM_sk_sort(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_PSTRING, (st)) - - #define sk_OPENSSL_BLOCK_new(cmp) ((STACK_OF(OPENSSL_BLOCK) *)sk_new(CHECKED_SK_CMP_FUNC(void, cmp))) #define sk_OPENSSL_BLOCK_new_null() ((STACK_OF(OPENSSL_BLOCK) *)sk_new_null()) #define sk_OPENSSL_BLOCK_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val)) @@ -2106,6 +2169,31 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_OPENSSL_BLOCK_is_sorted(st) SKM_sk_is_sorted(OPENSSL_BLOCK, (st)) +#define sk_OPENSSL_PSTRING_new(cmp) ((STACK_OF(OPENSSL_PSTRING) *)sk_new(CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) +#define sk_OPENSSL_PSTRING_new_null() ((STACK_OF(OPENSSL_PSTRING) *)sk_new_null()) +#define sk_OPENSSL_PSTRING_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i)) +#define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func)) +#define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i) +#define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_zero(st) SKM_sk_zero(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_unshift(st, val) sk_unshift(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_find_ex(st, val) sk_find_ex((_STACK *)CHECKED_CONST_PTR_OF(STACK_OF(OPENSSL_PSTRING), st), CHECKED_CONST_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_delete(st, i) SKM_sk_delete(OPENSSL_PSTRING, (st), (i)) +#define sk_OPENSSL_PSTRING_delete_ptr(st, ptr) (OPENSSL_PSTRING *)sk_delete_ptr(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, ptr)) +#define sk_OPENSSL_PSTRING_set_cmp_func(st, cmp) \ + ((int (*)(const OPENSSL_STRING * const *,const OPENSSL_STRING * const *)) \ + sk_set_cmp_func(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) +#define sk_OPENSSL_PSTRING_dup(st) SKM_sk_dup(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_shift(st) SKM_sk_shift(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_pop(st) (OPENSSL_STRING *)sk_pop(CHECKED_STACK_OF(OPENSSL_PSTRING, st)) +#define sk_OPENSSL_PSTRING_sort(st) SKM_sk_sort(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_PSTRING, (st)) + + #define d2i_ASN1_SET_OF_ACCESS_DESCRIPTION(st, pp, length, d2i_func, free_func, ex_tag, ex_class) \ SKM_ASN1_SET_OF_d2i(ACCESS_DESCRIPTION, (st), (pp), (length), (d2i_func), (free_func), (ex_tag), (ex_class)) #define i2d_ASN1_SET_OF_ACCESS_DESCRIPTION(st, pp, i2d_func, ex_tag, ex_class, is_set) \ diff --git a/app/openssl/crypto/symhacks.h b/app/openssl/crypto/symhacks.h index 3fd4a816..bd2f000d 100644 --- a/app/openssl/crypto/symhacks.h +++ b/app/openssl/crypto/symhacks.h @@ -176,7 +176,6 @@ #define SSL_CTX_set_default_passwd_cb_userdata SSL_CTX_set_def_passwd_cb_ud #undef SSL_COMP_get_compression_methods #define SSL_COMP_get_compression_methods SSL_COMP_get_compress_methods - #undef ssl_add_clienthello_renegotiate_ext #define ssl_add_clienthello_renegotiate_ext ssl_add_clienthello_reneg_ext #undef ssl_add_serverhello_renegotiate_ext @@ -185,6 +184,32 @@ #define ssl_parse_clienthello_renegotiate_ext ssl_parse_clienthello_reneg_ext #undef ssl_parse_serverhello_renegotiate_ext #define ssl_parse_serverhello_renegotiate_ext ssl_parse_serverhello_reneg_ext +#undef SSL_srp_server_param_with_username +#define SSL_srp_server_param_with_username SSL_srp_server_param_with_un +#undef SSL_CTX_set_srp_client_pwd_callback +#define SSL_CTX_set_srp_client_pwd_callback SSL_CTX_set_srp_client_pwd_cb +#undef SSL_CTX_set_srp_verify_param_callback +#define SSL_CTX_set_srp_verify_param_callback SSL_CTX_set_srp_vfy_param_cb +#undef SSL_CTX_set_srp_username_callback +#define SSL_CTX_set_srp_username_callback SSL_CTX_set_srp_un_cb +#undef ssl_add_clienthello_use_srtp_ext +#define ssl_add_clienthello_use_srtp_ext ssl_add_clihello_use_srtp_ext +#undef ssl_add_serverhello_use_srtp_ext +#define ssl_add_serverhello_use_srtp_ext ssl_add_serhello_use_srtp_ext +#undef ssl_parse_clienthello_use_srtp_ext +#define ssl_parse_clienthello_use_srtp_ext ssl_parse_clihello_use_srtp_ext +#undef ssl_parse_serverhello_use_srtp_ext +#define ssl_parse_serverhello_use_srtp_ext ssl_parse_serhello_use_srtp_ext +#undef SSL_CTX_set_next_protos_advertised_cb +#define SSL_CTX_set_next_protos_advertised_cb SSL_CTX_set_next_protos_adv_cb +#undef SSL_CTX_set_next_proto_select_cb +#define SSL_CTX_set_next_proto_select_cb SSL_CTX_set_next_proto_sel_cb +#undef ssl3_cbc_record_digest_supported +#define ssl3_cbc_record_digest_supported ssl3_cbc_record_digest_support +#undef ssl_check_clienthello_tlsext_late +#define ssl_check_clienthello_tlsext_late ssl_check_clihello_tlsext_late +#undef ssl_check_clienthello_tlsext_early +#define ssl_check_clienthello_tlsext_early ssl_check_clihello_tlsext_early /* Hack some long ENGINE names */ #undef ENGINE_get_default_BN_mod_exp_crt @@ -238,6 +263,9 @@ #define EC_GROUP_get_point_conversion_form EC_GROUP_get_point_conv_form #undef EC_GROUP_clear_free_all_extra_data #define EC_GROUP_clear_free_all_extra_data EC_GROUP_clr_free_all_xtra_data +#undef EC_KEY_set_public_key_affine_coordinates +#define EC_KEY_set_public_key_affine_coordinates \ + EC_KEY_set_pub_key_aff_coords #undef EC_POINT_set_Jprojective_coordinates_GFp #define EC_POINT_set_Jprojective_coordinates_GFp \ EC_POINT_set_Jproj_coords_GFp @@ -294,8 +322,6 @@ #define ec_GFp_simple_point_set_to_infinity ec_GFp_simple_pt_set_to_inf #undef ec_GFp_simple_points_make_affine #define ec_GFp_simple_points_make_affine ec_GFp_simple_pts_make_affine -#undef ec_GFp_simple_group_get_curve_GFp -#define ec_GFp_simple_group_get_curve_GFp ec_GFp_simple_grp_get_curve_GFp #undef ec_GFp_simple_set_Jprojective_coordinates_GFp #define ec_GFp_simple_set_Jprojective_coordinates_GFp \ ec_GFp_smp_set_Jproj_coords_GFp @@ -399,6 +425,12 @@ #undef dtls1_retransmit_buffered_messages #define dtls1_retransmit_buffered_messages dtls1_retransmit_buffered_msgs +/* Hack some long SRP names */ +#undef SRP_generate_server_master_secret +#define SRP_generate_server_master_secret SRP_gen_server_master_secret +#undef SRP_generate_client_master_secret +#define SRP_generate_client_master_secret SRP_gen_client_master_secret + /* Hack some long UI names */ #undef UI_method_get_prompt_constructor #define UI_method_get_prompt_constructor UI_method_get_prompt_constructr diff --git a/app/openssl/crypto/ui/ui.h b/app/openssl/crypto/ui/ui.h index 2b1cfa22..bd78aa41 100644 --- a/app/openssl/crypto/ui/ui.h +++ b/app/openssl/crypto/ui/ui.h @@ -316,7 +316,7 @@ int (*UI_method_get_writer(UI_METHOD *method))(UI*,UI_STRING*); int (*UI_method_get_flusher(UI_METHOD *method))(UI*); int (*UI_method_get_reader(UI_METHOD *method))(UI*,UI_STRING*); int (*UI_method_get_closer(UI_METHOD *method))(UI*); -char* (*UI_method_get_prompt_constructor(UI_METHOD *method))(UI*, const char*, const char*); +char * (*UI_method_get_prompt_constructor(UI_METHOD *method))(UI*, const char*, const char*); /* The following functions are helpers for method writers to access relevant data from a UI_STRING. */ diff --git a/app/openssl/crypto/ui/ui_openssl.c b/app/openssl/crypto/ui/ui_openssl.c index b05cbf34..e6ccd34a 100644 --- a/app/openssl/crypto/ui/ui_openssl.c +++ b/app/openssl/crypto/ui/ui_openssl.c @@ -122,9 +122,15 @@ * sigaction and fileno included. -pedantic would be more appropriate for * the intended purposes, but we can't prevent users from adding -ansi. */ +#if defined(OPENSSL_SYSNAME_VXWORKS) +#include +#endif + +#if !defined(_POSIX_C_SOURCE) && defined(OPENSSL_SYS_VMS) #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 2 #endif +#endif #include #include #include diff --git a/app/openssl/crypto/x509/by_dir.c b/app/openssl/crypto/x509/by_dir.c index 27ca5150..c6602dae 100644 --- a/app/openssl/crypto/x509/by_dir.c +++ b/app/openssl/crypto/x509/by_dir.c @@ -218,7 +218,7 @@ static int add_cert_dir(BY_DIR *ctx, const char *dir, int type) s=dir; p=s; - for (;;p++) + do { if ((*p == LIST_SEPARATOR_CHAR) || (*p == '\0')) { @@ -264,9 +264,7 @@ static int add_cert_dir(BY_DIR *ctx, const char *dir, int type) return 0; } } - if (*p == '\0') - break; - } + } while (*p++ != '\0'); return 1; } diff --git a/app/openssl/crypto/x509/x509.h b/app/openssl/crypto/x509/x509.h index e6f8a403..092dd745 100644 --- a/app/openssl/crypto/x509/x509.h +++ b/app/openssl/crypto/x509/x509.h @@ -657,11 +657,15 @@ int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey); int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki); +int X509_signature_dump(BIO *bp,const ASN1_STRING *sig, int indent); int X509_signature_print(BIO *bp,X509_ALGOR *alg, ASN1_STRING *sig); int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_pubkey_digest(const X509 *data,const EVP_MD *type, @@ -763,6 +767,7 @@ X509_ALGOR *X509_ALGOR_dup(X509_ALGOR *xn); int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, void *pval); void X509_ALGOR_get0(ASN1_OBJECT **paobj, int *pptype, void **ppval, X509_ALGOR *algor); +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); X509_NAME *X509_NAME_dup(X509_NAME *xn); X509_NAME_ENTRY *X509_NAME_ENTRY_dup(X509_NAME_ENTRY *ne); @@ -896,6 +901,9 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *algor1, int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, ASN1_BIT_STRING *signature, void *data, EVP_PKEY *pkey, const EVP_MD *type); +int ASN1_item_sign_ctx(const ASN1_ITEM *it, + X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, void *asn, EVP_MD_CTX *ctx); #endif int X509_set_version(X509 *x,long version); @@ -1161,6 +1169,9 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, unsigned char *salt, int saltlen, unsigned char *aiv, int prf_nid); +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen); + /* PKCS#8 utilities */ DECLARE_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO) diff --git a/app/openssl/crypto/x509/x509_cmp.c b/app/openssl/crypto/x509/x509_cmp.c index 4bc9da07..352aa374 100644 --- a/app/openssl/crypto/x509/x509_cmp.c +++ b/app/openssl/crypto/x509/x509_cmp.c @@ -86,16 +86,20 @@ unsigned long X509_issuer_and_serial_hash(X509 *a) EVP_MD_CTX_init(&ctx); f=X509_NAME_oneline(a->cert_info->issuer,NULL,0); - ret=strlen(f); - EVP_DigestInit_ex(&ctx, EVP_md5(), NULL); - EVP_DigestUpdate(&ctx,(unsigned char *)f,ret); + if (!EVP_DigestInit_ex(&ctx, EVP_md5(), NULL)) + goto err; + if (!EVP_DigestUpdate(&ctx,(unsigned char *)f,strlen(f))) + goto err; OPENSSL_free(f); - EVP_DigestUpdate(&ctx,(unsigned char *)a->cert_info->serialNumber->data, - (unsigned long)a->cert_info->serialNumber->length); - EVP_DigestFinal_ex(&ctx,&(md[0]),NULL); + if(!EVP_DigestUpdate(&ctx,(unsigned char *)a->cert_info->serialNumber->data, + (unsigned long)a->cert_info->serialNumber->length)) + goto err; + if (!EVP_DigestFinal_ex(&ctx,&(md[0]),NULL)) + goto err; ret=( ((unsigned long)md[0] )|((unsigned long)md[1]<<8L)| ((unsigned long)md[2]<<16L)|((unsigned long)md[3]<<24L) )&0xffffffffL; + err: EVP_MD_CTX_cleanup(&ctx); return(ret); } @@ -219,7 +223,9 @@ unsigned long X509_NAME_hash(X509_NAME *x) /* Make sure X509_NAME structure contains valid cached encoding */ i2d_X509_NAME(x,NULL); - EVP_Digest(x->canon_enc, x->canon_enclen, md, NULL, EVP_sha1(), NULL); + if (!EVP_Digest(x->canon_enc, x->canon_enclen, md, NULL, EVP_sha1(), + NULL)) + return 0; ret=( ((unsigned long)md[0] )|((unsigned long)md[1]<<8L)| ((unsigned long)md[2]<<16L)|((unsigned long)md[3]<<24L) @@ -234,16 +240,22 @@ unsigned long X509_NAME_hash(X509_NAME *x) unsigned long X509_NAME_hash_old(X509_NAME *x) { + EVP_MD_CTX md_ctx; unsigned long ret=0; unsigned char md[16]; /* Make sure X509_NAME structure contains valid cached encoding */ i2d_X509_NAME(x,NULL); - EVP_Digest(x->bytes->data, x->bytes->length, md, NULL, EVP_md5(), NULL); + EVP_MD_CTX_init(&md_ctx); + EVP_MD_CTX_set_flags(&md_ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + if (EVP_DigestInit_ex(&md_ctx, EVP_md5(), NULL) + && EVP_DigestUpdate(&md_ctx, x->bytes->data, x->bytes->length) + && EVP_DigestFinal_ex(&md_ctx,md,NULL)) + ret=(((unsigned long)md[0] )|((unsigned long)md[1]<<8L)| + ((unsigned long)md[2]<<16L)|((unsigned long)md[3]<<24L) + )&0xffffffffL; + EVP_MD_CTX_cleanup(&md_ctx); - ret=( ((unsigned long)md[0] )|((unsigned long)md[1]<<8L)| - ((unsigned long)md[2]<<16L)|((unsigned long)md[3]<<24L) - )&0xffffffffL; return(ret); } #endif diff --git a/app/openssl/crypto/x509/x509_lu.c b/app/openssl/crypto/x509/x509_lu.c index 3a6e04a1..38525a8c 100644 --- a/app/openssl/crypto/x509/x509_lu.c +++ b/app/openssl/crypto/x509/x509_lu.c @@ -87,7 +87,7 @@ void X509_LOOKUP_free(X509_LOOKUP *ctx) if (ctx == NULL) return; if ( (ctx->method != NULL) && (ctx->method->free != NULL)) - ctx->method->free(ctx); + (*ctx->method->free)(ctx); OPENSSL_free(ctx); } diff --git a/app/openssl/crypto/x509/x509_vfy.c b/app/openssl/crypto/x509/x509_vfy.c index 5a0b0249..920066ae 100644 --- a/app/openssl/crypto/x509/x509_vfy.c +++ b/app/openssl/crypto/x509/x509_vfy.c @@ -153,7 +153,6 @@ static int x509_subject_cmp(X509 **a, X509 **b) int X509_verify_cert(X509_STORE_CTX *ctx) { X509 *x,*xtmp,*chain_ss=NULL; - X509_NAME *xn; int bad_chain = 0; X509_VERIFY_PARAM *param = ctx->param; int depth,i,ok=0; @@ -205,7 +204,6 @@ int X509_verify_cert(X509_STORE_CTX *ctx) */ /* If we are self signed, we break */ - xn=X509_get_issuer_name(x); if (ctx->check_issued(ctx, x,x)) break; /* If we were passed a cert chain, use it first */ @@ -242,7 +240,6 @@ int X509_verify_cert(X509_STORE_CTX *ctx) i=sk_X509_num(ctx->chain); x=sk_X509_value(ctx->chain,i-1); - xn = X509_get_subject_name(x); if (ctx->check_issued(ctx, x, x)) { /* we have a self signed certificate */ @@ -291,7 +288,6 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (depth < num) break; /* If we are self signed, we break */ - xn=X509_get_issuer_name(x); if (ctx->check_issued(ctx,x,x)) break; ok = ctx->get_issuer(&xtmp, ctx, x); @@ -310,7 +306,6 @@ int X509_verify_cert(X509_STORE_CTX *ctx) } /* we now have our chain, lets check it... */ - xn=X509_get_issuer_name(x); /* Is last certificate looked up self signed? */ if (!ctx->check_issued(ctx,x,x)) @@ -699,6 +694,7 @@ static int check_cert(X509_STORE_CTX *ctx) X509_CRL *crl = NULL, *dcrl = NULL; X509 *x; int ok, cnum; + unsigned int last_reasons; cnum = ctx->error_depth; x = sk_X509_value(ctx->chain, cnum); ctx->current_cert = x; @@ -707,6 +703,7 @@ static int check_cert(X509_STORE_CTX *ctx) ctx->current_reasons = 0; while (ctx->current_reasons != CRLDP_ALL_REASONS) { + last_reasons = ctx->current_reasons; /* Try to retrieve relevant CRL */ if (ctx->get_crl) ok = ctx->get_crl(ctx, &crl, x); @@ -750,6 +747,15 @@ static int check_cert(X509_STORE_CTX *ctx) X509_CRL_free(dcrl); crl = NULL; dcrl = NULL; + /* If reasons not updated we wont get anywhere by + * another iteration, so exit loop. + */ + if (last_reasons == ctx->current_reasons) + { + ctx->error = X509_V_ERR_UNABLE_TO_GET_CRL; + ok = ctx->verify_cb(0, ctx); + goto err; + } } err: X509_CRL_free(crl); @@ -877,7 +883,7 @@ static int crl_extension_match(X509_CRL *a, X509_CRL *b, int nid) { ASN1_OCTET_STRING *exta, *extb; int i; - i = X509_CRL_get_ext_by_NID(a, nid, 0); + i = X509_CRL_get_ext_by_NID(a, nid, -1); if (i >= 0) { /* Can't have multiple occurrences */ @@ -888,7 +894,7 @@ static int crl_extension_match(X509_CRL *a, X509_CRL *b, int nid) else exta = NULL; - i = X509_CRL_get_ext_by_NID(b, nid, 0); + i = X509_CRL_get_ext_by_NID(b, nid, -1); if (i >= 0) { @@ -1456,10 +1462,9 @@ static int cert_crl(X509_STORE_CTX *ctx, X509_CRL *crl, X509 *x) * a certificate was revoked. This has since been changed since * critical extension can change the meaning of CRL entries. */ - if (crl->flags & EXFLAG_CRITICAL) + if (!(ctx->param->flags & X509_V_FLAG_IGNORE_CRITICAL) + && (crl->flags & EXFLAG_CRITICAL)) { - if (ctx->param->flags & X509_V_FLAG_IGNORE_CRITICAL) - return 1; ctx->error = X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION; ok = ctx->verify_cb(0, ctx); if(!ok) @@ -1732,7 +1737,7 @@ int X509_cmp_time(const ASN1_TIME *ctm, time_t *cmp_time) atm.length=sizeof(buff2); atm.data=(unsigned char *)buff2; - if (X509_time_adj(&atm,-offset*60, cmp_time) == NULL) + if (X509_time_adj(&atm, offset*60, cmp_time) == NULL) return 0; if (ctm->type == V_ASN1_UTCTIME) diff --git a/app/openssl/crypto/x509/x509type.c b/app/openssl/crypto/x509/x509type.c index 3385ad3f..9702ec53 100644 --- a/app/openssl/crypto/x509/x509type.c +++ b/app/openssl/crypto/x509/x509type.c @@ -100,20 +100,26 @@ int X509_certificate_type(X509 *x, EVP_PKEY *pkey) break; } - i=X509_get_signature_type(x); - switch (i) + i=OBJ_obj2nid(x->sig_alg->algorithm); + if (i && OBJ_find_sigid_algs(i, NULL, &i)) { - case EVP_PKEY_RSA: - ret|=EVP_PKS_RSA; - break; - case EVP_PKEY_DSA: - ret|=EVP_PKS_DSA; - break; - case EVP_PKEY_EC: - ret|=EVP_PKS_EC; - break; - default: - break; + + switch (i) + { + case NID_rsaEncryption: + case NID_rsa: + ret|=EVP_PKS_RSA; + break; + case NID_dsa: + case NID_dsa_2: + ret|=EVP_PKS_DSA; + break; + case NID_X9_62_id_ecPublicKey: + ret|=EVP_PKS_EC; + break; + default: + break; + } } if (EVP_PKEY_size(pk) <= 1024/8)/* /8 because it's 1024 bits we look diff --git a/app/openssl/crypto/x509/x_all.c b/app/openssl/crypto/x509/x_all.c index 8ec88c21..e06602d6 100644 --- a/app/openssl/crypto/x509/x_all.c +++ b/app/openssl/crypto/x509/x_all.c @@ -95,12 +95,26 @@ int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md) x->sig_alg, x->signature, x->cert_info,pkey,md)); } +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx) + { + x->cert_info->enc.modified = 1; + return ASN1_item_sign_ctx(ASN1_ITEM_rptr(X509_CINF), + x->cert_info->signature, + x->sig_alg, x->signature, x->cert_info, ctx); + } + int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md) { return(ASN1_item_sign(ASN1_ITEM_rptr(X509_REQ_INFO),x->sig_alg, NULL, x->signature, x->req_info,pkey,md)); } +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx) + { + return ASN1_item_sign_ctx(ASN1_ITEM_rptr(X509_REQ_INFO), + x->sig_alg, NULL, x->signature, x->req_info, ctx); + } + int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md) { x->crl->enc.modified = 1; @@ -108,6 +122,13 @@ int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md) x->sig_alg, x->signature, x->crl,pkey,md)); } +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx) + { + x->crl->enc.modified = 1; + return ASN1_item_sign_ctx(ASN1_ITEM_rptr(X509_CRL_INFO), + x->crl->sig_alg, x->sig_alg, x->signature, x->crl, ctx); + } + int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md) { return(ASN1_item_sign(ASN1_ITEM_rptr(NETSCAPE_SPKAC), x->sig_algor,NULL, diff --git a/app/openssl/crypto/x509v3/v3_addr.c b/app/openssl/crypto/x509v3/v3_addr.c index 0d70e869..df46a498 100644 --- a/app/openssl/crypto/x509v3/v3_addr.c +++ b/app/openssl/crypto/x509v3/v3_addr.c @@ -142,12 +142,13 @@ unsigned int v3_addr_get_afi(const IPAddressFamily *f) * Expand the bitstring form of an address into a raw byte array. * At the moment this is coded for simplicity, not speed. */ -static void addr_expand(unsigned char *addr, +static int addr_expand(unsigned char *addr, const ASN1_BIT_STRING *bs, const int length, const unsigned char fill) { - OPENSSL_assert(bs->length >= 0 && bs->length <= length); + if (bs->length < 0 || bs->length > length) + return 0; if (bs->length > 0) { memcpy(addr, bs->data, bs->length); if ((bs->flags & 7) != 0) { @@ -159,6 +160,7 @@ static void addr_expand(unsigned char *addr, } } memset(addr + bs->length, fill, length - bs->length); + return 1; } /* @@ -181,15 +183,13 @@ static int i2r_address(BIO *out, return 0; switch (afi) { case IANA_AFI_IPV4: - if (bs->length > 4) + if (!addr_expand(addr, bs, 4, fill)) return 0; - addr_expand(addr, bs, 4, fill); BIO_printf(out, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); break; case IANA_AFI_IPV6: - if (bs->length > 16) + if (!addr_expand(addr, bs, 16, fill)) return 0; - addr_expand(addr, bs, 16, fill); for (n = 16; n > 1 && addr[n-1] == 0x00 && addr[n-2] == 0x00; n -= 2) ; for (i = 0; i < n; i += 2) @@ -315,6 +315,12 @@ static int i2r_IPAddrBlocks(const X509V3_EXT_METHOD *method, /* * Sort comparison function for a sequence of IPAddressOrRange * elements. + * + * There's no sane answer we can give if addr_expand() fails, and an + * assertion failure on externally supplied data is seriously uncool, + * so we just arbitrarily declare that if given invalid inputs this + * function returns -1. If this messes up your preferred sort order + * for garbage input, tough noogies. */ static int IPAddressOrRange_cmp(const IPAddressOrRange *a, const IPAddressOrRange *b, @@ -326,22 +332,26 @@ static int IPAddressOrRange_cmp(const IPAddressOrRange *a, switch (a->type) { case IPAddressOrRange_addressPrefix: - addr_expand(addr_a, a->u.addressPrefix, length, 0x00); + if (!addr_expand(addr_a, a->u.addressPrefix, length, 0x00)) + return -1; prefixlen_a = addr_prefixlen(a->u.addressPrefix); break; case IPAddressOrRange_addressRange: - addr_expand(addr_a, a->u.addressRange->min, length, 0x00); + if (!addr_expand(addr_a, a->u.addressRange->min, length, 0x00)) + return -1; prefixlen_a = length * 8; break; } switch (b->type) { case IPAddressOrRange_addressPrefix: - addr_expand(addr_b, b->u.addressPrefix, length, 0x00); + if (!addr_expand(addr_b, b->u.addressPrefix, length, 0x00)) + return -1; prefixlen_b = addr_prefixlen(b->u.addressPrefix); break; case IPAddressOrRange_addressRange: - addr_expand(addr_b, b->u.addressRange->min, length, 0x00); + if (!addr_expand(addr_b, b->u.addressRange->min, length, 0x00)) + return -1; prefixlen_b = length * 8; break; } @@ -383,6 +393,7 @@ static int range_should_be_prefix(const unsigned char *min, unsigned char mask; int i, j; + OPENSSL_assert(memcmp(min, max, length) <= 0); for (i = 0; i < length && min[i] == max[i]; i++) ; for (j = length - 1; j >= 0 && min[j] == 0x00 && max[j] == 0xFF; j--) @@ -601,10 +612,10 @@ static IPAddressOrRanges *make_prefix_or_range(IPAddrBlocks *addr, return NULL; switch (afi) { case IANA_AFI_IPV4: - sk_IPAddressOrRange_set_cmp_func(aors, v4IPAddressOrRange_cmp); + (void) sk_IPAddressOrRange_set_cmp_func(aors, v4IPAddressOrRange_cmp); break; case IANA_AFI_IPV6: - sk_IPAddressOrRange_set_cmp_func(aors, v6IPAddressOrRange_cmp); + (void) sk_IPAddressOrRange_set_cmp_func(aors, v6IPAddressOrRange_cmp); break; } f->ipAddressChoice->type = IPAddressChoice_addressesOrRanges; @@ -656,22 +667,22 @@ int v3_addr_add_range(IPAddrBlocks *addr, /* * Extract min and max values from an IPAddressOrRange. */ -static void extract_min_max(IPAddressOrRange *aor, +static int extract_min_max(IPAddressOrRange *aor, unsigned char *min, unsigned char *max, int length) { - OPENSSL_assert(aor != NULL && min != NULL && max != NULL); + if (aor == NULL || min == NULL || max == NULL) + return 0; switch (aor->type) { case IPAddressOrRange_addressPrefix: - addr_expand(min, aor->u.addressPrefix, length, 0x00); - addr_expand(max, aor->u.addressPrefix, length, 0xFF); - return; + return (addr_expand(min, aor->u.addressPrefix, length, 0x00) && + addr_expand(max, aor->u.addressPrefix, length, 0xFF)); case IPAddressOrRange_addressRange: - addr_expand(min, aor->u.addressRange->min, length, 0x00); - addr_expand(max, aor->u.addressRange->max, length, 0xFF); - return; + return (addr_expand(min, aor->u.addressRange->min, length, 0x00) && + addr_expand(max, aor->u.addressRange->max, length, 0xFF)); } + return 0; } /* @@ -687,9 +698,10 @@ int v3_addr_get_range(IPAddressOrRange *aor, if (aor == NULL || min == NULL || max == NULL || afi_length == 0 || length < afi_length || (aor->type != IPAddressOrRange_addressPrefix && - aor->type != IPAddressOrRange_addressRange)) + aor->type != IPAddressOrRange_addressRange) || + !extract_min_max(aor, min, max, afi_length)) return 0; - extract_min_max(aor, min, max, afi_length); + return afi_length; } @@ -771,8 +783,9 @@ int v3_addr_is_canonical(IPAddrBlocks *addr) IPAddressOrRange *a = sk_IPAddressOrRange_value(aors, j); IPAddressOrRange *b = sk_IPAddressOrRange_value(aors, j + 1); - extract_min_max(a, a_min, a_max, length); - extract_min_max(b, b_min, b_max, length); + if (!extract_min_max(a, a_min, a_max, length) || + !extract_min_max(b, b_min, b_max, length)) + return 0; /* * Punt misordered list, overlapping start, or inverted range. @@ -800,14 +813,17 @@ int v3_addr_is_canonical(IPAddrBlocks *addr) } /* - * Check final range to see if it should be a prefix. + * Check range to see if it's inverted or should be a + * prefix. */ j = sk_IPAddressOrRange_num(aors) - 1; { IPAddressOrRange *a = sk_IPAddressOrRange_value(aors, j); - if (a->type == IPAddressOrRange_addressRange) { - extract_min_max(a, a_min, a_max, length); - if (range_should_be_prefix(a_min, a_max, length) >= 0) + if (a != NULL && a->type == IPAddressOrRange_addressRange) { + if (!extract_min_max(a, a_min, a_max, length)) + return 0; + if (memcmp(a_min, a_max, length) > 0 || + range_should_be_prefix(a_min, a_max, length) >= 0) return 0; } } @@ -841,8 +857,16 @@ static int IPAddressOrRanges_canonize(IPAddressOrRanges *aors, unsigned char a_min[ADDR_RAW_BUF_LEN], a_max[ADDR_RAW_BUF_LEN]; unsigned char b_min[ADDR_RAW_BUF_LEN], b_max[ADDR_RAW_BUF_LEN]; - extract_min_max(a, a_min, a_max, length); - extract_min_max(b, b_min, b_max, length); + if (!extract_min_max(a, a_min, a_max, length) || + !extract_min_max(b, b_min, b_max, length)) + return 0; + + /* + * Punt inverted ranges. + */ + if (memcmp(a_min, a_max, length) > 0 || + memcmp(b_min, b_max, length) > 0) + return 0; /* * Punt overlaps. @@ -860,8 +884,8 @@ static int IPAddressOrRanges_canonize(IPAddressOrRanges *aors, IPAddressOrRange *merged; if (!make_addressRange(&merged, a_min, b_max, length)) return 0; - sk_IPAddressOrRange_set(aors, i, merged); - sk_IPAddressOrRange_delete(aors, i + 1); + (void) sk_IPAddressOrRange_set(aors, i, merged); + (void) sk_IPAddressOrRange_delete(aors, i + 1); IPAddressOrRange_free(a); IPAddressOrRange_free(b); --i; @@ -869,6 +893,20 @@ static int IPAddressOrRanges_canonize(IPAddressOrRanges *aors, } } + /* + * Check for inverted final range. + */ + j = sk_IPAddressOrRange_num(aors) - 1; + { + IPAddressOrRange *a = sk_IPAddressOrRange_value(aors, j); + if (a != NULL && a->type == IPAddressOrRange_addressRange) { + unsigned char a_min[ADDR_RAW_BUF_LEN], a_max[ADDR_RAW_BUF_LEN]; + extract_min_max(a, a_min, a_max, length); + if (memcmp(a_min, a_max, length) > 0) + return 0; + } + } + return 1; } @@ -885,7 +923,7 @@ int v3_addr_canonize(IPAddrBlocks *addr) v3_addr_get_afi(f))) return 0; } - sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp); + (void) sk_IPAddressFamily_set_cmp_func(addr, IPAddressFamily_cmp); sk_IPAddressFamily_sort(addr); OPENSSL_assert(v3_addr_is_canonical(addr)); return 1; @@ -1017,6 +1055,11 @@ static void *v2i_IPAddrBlocks(const struct v3_ext_method *method, X509V3_conf_err(val); goto err; } + if (memcmp(min, max, length_from_afi(afi)) > 0) { + X509V3err(X509V3_F_V2I_IPADDRBLOCKS, X509V3_R_EXTENSION_VALUE_ERROR); + X509V3_conf_err(val); + goto err; + } if (!v3_addr_add_range(addr, afi, safi, min, max)) { X509V3err(X509V3_F_V2I_IPADDRBLOCKS, ERR_R_MALLOC_FAILURE); goto err; @@ -1102,13 +1145,15 @@ static int addr_contains(IPAddressOrRanges *parent, p = 0; for (c = 0; c < sk_IPAddressOrRange_num(child); c++) { - extract_min_max(sk_IPAddressOrRange_value(child, c), - c_min, c_max, length); + if (!extract_min_max(sk_IPAddressOrRange_value(child, c), + c_min, c_max, length)) + return -1; for (;; p++) { if (p >= sk_IPAddressOrRange_num(parent)) return 0; - extract_min_max(sk_IPAddressOrRange_value(parent, p), - p_min, p_max, length); + if (!extract_min_max(sk_IPAddressOrRange_value(parent, p), + p_min, p_max, length)) + return 0; if (memcmp(p_max, c_max, length) < 0) continue; if (memcmp(p_min, c_min, length) > 0) @@ -1130,7 +1175,7 @@ int v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b) return 1; if (b == NULL || v3_addr_inherits(a) || v3_addr_inherits(b)) return 0; - sk_IPAddressFamily_set_cmp_func(b, IPAddressFamily_cmp); + (void) sk_IPAddressFamily_set_cmp_func(b, IPAddressFamily_cmp); for (i = 0; i < sk_IPAddressFamily_num(a); i++) { IPAddressFamily *fa = sk_IPAddressFamily_value(a, i); int j = sk_IPAddressFamily_find(b, fa); @@ -1195,7 +1240,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx, } if (!v3_addr_is_canonical(ext)) validation_err(X509_V_ERR_INVALID_EXTENSION); - sk_IPAddressFamily_set_cmp_func(ext, IPAddressFamily_cmp); + (void) sk_IPAddressFamily_set_cmp_func(ext, IPAddressFamily_cmp); if ((child = sk_IPAddressFamily_dup(ext)) == NULL) { X509V3err(X509V3_F_V3_ADDR_VALIDATE_PATH_INTERNAL, ERR_R_MALLOC_FAILURE); ret = 0; @@ -1221,7 +1266,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx, } continue; } - sk_IPAddressFamily_set_cmp_func(x->rfc3779_addr, IPAddressFamily_cmp); + (void) sk_IPAddressFamily_set_cmp_func(x->rfc3779_addr, IPAddressFamily_cmp); for (j = 0; j < sk_IPAddressFamily_num(child); j++) { IPAddressFamily *fc = sk_IPAddressFamily_value(child, j); int k = sk_IPAddressFamily_find(x->rfc3779_addr, fc); diff --git a/app/openssl/crypto/x509v3/v3_asid.c b/app/openssl/crypto/x509v3/v3_asid.c index 3f434c06..1587e8ed 100644 --- a/app/openssl/crypto/x509v3/v3_asid.c +++ b/app/openssl/crypto/x509v3/v3_asid.c @@ -358,6 +358,20 @@ static int ASIdentifierChoice_is_canonical(ASIdentifierChoice *choice) goto done; } + /* + * Check for inverted range. + */ + i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1; + { + ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i); + ASN1_INTEGER *a_min, *a_max; + if (a != NULL && a->type == ASIdOrRange_range) { + extract_min_max(a, &a_min, &a_max); + if (ASN1_INTEGER_cmp(a_min, a_max) > 0) + goto done; + } + } + ret = 1; done: @@ -392,9 +406,18 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice) return 1; /* - * We have a list. Sort it. + * If not a list, or if empty list, it's broken. + */ + if (choice->type != ASIdentifierChoice_asIdsOrRanges || + sk_ASIdOrRange_num(choice->u.asIdsOrRanges) == 0) { + X509V3err(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE, + X509V3_R_EXTENSION_VALUE_ERROR); + return 0; + } + + /* + * We have a non-empty list. Sort it. */ - OPENSSL_assert(choice->type == ASIdentifierChoice_asIdsOrRanges); sk_ASIdOrRange_sort(choice->u.asIdsOrRanges); /* @@ -414,6 +437,13 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice) */ OPENSSL_assert(ASN1_INTEGER_cmp(a_min, b_min) <= 0); + /* + * Punt inverted ranges. + */ + if (ASN1_INTEGER_cmp(a_min, a_max) > 0 || + ASN1_INTEGER_cmp(b_min, b_max) > 0) + goto done; + /* * Check for overlaps. */ @@ -465,12 +495,26 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice) break; } ASIdOrRange_free(b); - sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1); + (void) sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1); i--; continue; } } + /* + * Check for final inverted range. + */ + i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1; + { + ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i); + ASN1_INTEGER *a_min, *a_max; + if (a != NULL && a->type == ASIdOrRange_range) { + extract_min_max(a, &a_min, &a_max); + if (ASN1_INTEGER_cmp(a_min, a_max) > 0) + goto done; + } + } + OPENSSL_assert(ASIdentifierChoice_is_canonical(choice)); /* Paranoia */ ret = 1; @@ -498,6 +542,7 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method, struct v3_ext_ctx *ctx, STACK_OF(CONF_VALUE) *values) { + ASN1_INTEGER *min = NULL, *max = NULL; ASIdentifiers *asid = NULL; int i; @@ -508,7 +553,6 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method, for (i = 0; i < sk_CONF_VALUE_num(values); i++) { CONF_VALUE *val = sk_CONF_VALUE_value(values, i); - ASN1_INTEGER *min = NULL, *max = NULL; int i1, i2, i3, is_range, which; /* @@ -578,18 +622,19 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method, max = s2i_ASN1_INTEGER(NULL, s + i2); OPENSSL_free(s); if (min == NULL || max == NULL) { - ASN1_INTEGER_free(min); - ASN1_INTEGER_free(max); X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); goto err; } + if (ASN1_INTEGER_cmp(min, max) > 0) { + X509V3err(X509V3_F_V2I_ASIDENTIFIERS, X509V3_R_EXTENSION_VALUE_ERROR); + goto err; + } } if (!v3_asid_add_id_or_range(asid, which, min, max)) { - ASN1_INTEGER_free(min); - ASN1_INTEGER_free(max); X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); goto err; } + min = max = NULL; } /* @@ -601,6 +646,8 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method, err: ASIdentifiers_free(asid); + ASN1_INTEGER_free(min); + ASN1_INTEGER_free(max); return NULL; } diff --git a/app/openssl/crypto/x509v3/v3_pci.c b/app/openssl/crypto/x509v3/v3_pci.c index 0dcfa004..f7b733ae 100644 --- a/app/openssl/crypto/x509v3/v3_pci.c +++ b/app/openssl/crypto/x509v3/v3_pci.c @@ -2,7 +2,7 @@ /* Contributed to the OpenSSL Project 2004 * by Richard Levitte (richard@levitte.org) */ -/* Copyright (c) 2004 Kungliga Tekniska Högskolan +/* Copyright (c) 2004 Kungliga Tekniska Högskolan * (Royal Institute of Technology, Stockholm, Sweden). * All rights reserved. * diff --git a/app/openssl/crypto/x509v3/v3_pcia.c b/app/openssl/crypto/x509v3/v3_pcia.c index bb362e0e..eb082739 100644 --- a/app/openssl/crypto/x509v3/v3_pcia.c +++ b/app/openssl/crypto/x509v3/v3_pcia.c @@ -2,7 +2,7 @@ /* Contributed to the OpenSSL Project 2004 * by Richard Levitte (richard@levitte.org) */ -/* Copyright (c) 2004 Kungliga Tekniska Högskolan +/* Copyright (c) 2004 Kungliga Tekniska Högskolan * (Royal Institute of Technology, Stockholm, Sweden). * All rights reserved. * diff --git a/app/openssl/crypto/x509v3/v3_purp.c b/app/openssl/crypto/x509v3/v3_purp.c index 181bd349..ad688657 100644 --- a/app/openssl/crypto/x509v3/v3_purp.c +++ b/app/openssl/crypto/x509v3/v3_purp.c @@ -474,11 +474,11 @@ static void x509v3_cache_extensions(X509 *x) for (i = 0; i < X509_get_ext_count(x); i++) { ex = X509_get_ext(x, i); - if (!X509_EXTENSION_get_critical(ex)) - continue; if (OBJ_obj2nid(X509_EXTENSION_get_object(ex)) == NID_freshest_crl) x->ex_flags |= EXFLAG_FRESHEST; + if (!X509_EXTENSION_get_critical(ex)) + continue; if (!X509_supported_extension(ex)) { x->ex_flags |= EXFLAG_CRITICAL; diff --git a/app/openssl/crypto/x509v3/v3_skey.c b/app/openssl/crypto/x509v3/v3_skey.c index 202c9e48..0a984fba 100644 --- a/app/openssl/crypto/x509v3/v3_skey.c +++ b/app/openssl/crypto/x509v3/v3_skey.c @@ -129,7 +129,8 @@ static ASN1_OCTET_STRING *s2i_skey_id(X509V3_EXT_METHOD *method, goto err; } - EVP_Digest(pk->data, pk->length, pkey_dig, &diglen, EVP_sha1(), NULL); + if (!EVP_Digest(pk->data, pk->length, pkey_dig, &diglen, EVP_sha1(), NULL)) + goto err; if(!M_ASN1_OCTET_STRING_set(oct, pkey_dig, diglen)) { X509V3err(X509V3_F_S2I_SKEY_ID,ERR_R_MALLOC_FAILURE); diff --git a/app/openssl/crypto/x509v3/v3_utl.c b/app/openssl/crypto/x509v3/v3_utl.c index e0302345..87cfceb4 100644 --- a/app/openssl/crypto/x509v3/v3_utl.c +++ b/app/openssl/crypto/x509v3/v3_utl.c @@ -365,7 +365,7 @@ char *hex_to_string(const unsigned char *buffer, long len) char *tmp, *q; const unsigned char *p; int i; - const static char hexdig[] = "0123456789ABCDEF"; + static const char hexdig[] = "0123456789ABCDEF"; if(!buffer || !len) return NULL; if(!(tmp = OPENSSL_malloc(len * 3 + 1))) { X509V3err(X509V3_F_HEX_TO_STRING,ERR_R_MALLOC_FAILURE); diff --git a/app/openssl/crypto/x86_64cpuid.S b/app/openssl/crypto/x86_64cpuid.S new file mode 100644 index 00000000..562b03ab --- /dev/null +++ b/app/openssl/crypto/x86_64cpuid.S @@ -0,0 +1,234 @@ + +.hidden OPENSSL_cpuid_setup +.section .init + call OPENSSL_cpuid_setup + +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,8,4 + +.text + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: + movl (%rdi),%eax +.Lspin: leaq (%rsi,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rdi) + jne .Lspin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: + movq %rbx,%r8 + + xorl %eax,%eax + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz .Lintel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz .Lintel + + + movl $2147483648,%eax + cpuid + cmpl $2147483649,%eax + jb .Lintel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb .Lintel + + movl $2147483656,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc .Lgeneric + shrl $16,%ebx + cmpb %r10b,%bl + ja .Lgeneric + andl $4026531839,%edx + jmp .Lgeneric + +.Lintel: + cmpl $4,%r11d + movl $-1,%r10d + jb .Lnocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + +.Lnocacheinfo: + movl $1,%eax + cpuid + andl $3220176895,%edx + cmpl $0,%r9d + jne .Lnotintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .Lnotintel + orl $1048576,%edx +.Lnotintel: + btl $28,%edx + jnc .Lgeneric + andl $4026531839,%edx + cmpl $0,%r10d + je .Lgeneric + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .Lgeneric + andl $4026531839,%edx +.Lgeneric: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + movl $4026525695,%eax + andl %eax,%r9d +.Ldone: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx + orq %r9,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: + xorq %rax,%rax + cmpq $15,%rsi + jae .Lot + cmpq $0,%rsi + je .Lret +.Little: + movb %al,(%rdi) + subq $1,%rsi + leaq 1(%rdi),%rdi + jnz .Little +.Lret: + .byte 0xf3,0xc3 +.align 16 +.Lot: + testq $7,%rdi + jz .Laligned + movb %al,(%rdi) + leaq -1(%rsi),%rsi + leaq 1(%rdi),%rdi + jmp .Lot +.Laligned: + movq %rax,(%rdi) + leaq -8(%rsi),%rsi + testq $-8,%rsi + leaq 8(%rdi),%rdi + jnz .Laligned + cmpq $0,%rsi + jne .Little + .byte 0xf3,0xc3 +.size OPENSSL_cleanse,.-OPENSSL_cleanse +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: + movl $8,%ecx +.Loop_rdrand: +.byte 72,15,199,240 + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand diff --git a/app/openssl/crypto/x86_64cpuid.pl b/app/openssl/crypto/x86_64cpuid.pl index c96821a3..6ebfd017 100644 --- a/app/openssl/crypto/x86_64cpuid.pl +++ b/app/openssl/crypto/x86_64cpuid.pl @@ -7,15 +7,25 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order -if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } -else { $arg1="%rdi"; $arg2="%rsi"; } print<<___; .extern OPENSSL_cpuid_setup +.hidden OPENSSL_cpuid_setup .section .init call OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,8,4 + .text .globl OPENSSL_atomic_add @@ -46,7 +56,7 @@ OPENSSL_rdtsc: .type OPENSSL_ia32_cpuid,\@abi-omnipotent .align 16 OPENSSL_ia32_cpuid: - mov %rbx,%r8 + mov %rbx,%r8 # save %rbx xor %eax,%eax cpuid @@ -78,7 +88,15 @@ OPENSSL_ia32_cpuid: # AMD specific mov \$0x80000000,%eax cpuid - cmp \$0x80000008,%eax + cmp \$0x80000001,%eax + jb .Lintel + mov %eax,%r10d + mov \$0x80000001,%eax + cpuid + or %ecx,%r9d + and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 + + cmp \$0x80000008,%r10d jb .Lintel mov \$0x80000008,%eax @@ -89,12 +107,12 @@ OPENSSL_ia32_cpuid: mov \$1,%eax cpuid bt \$28,%edx # test hyper-threading bit - jnc .Ldone + jnc .Lgeneric shr \$16,%ebx # number of logical processors cmp %r10b,%bl - ja .Ldone + ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) - jmp .Ldone + jmp .Lgeneric .Lintel: cmp \$4,%r11d @@ -111,30 +129,47 @@ OPENSSL_ia32_cpuid: .Lnocacheinfo: mov \$1,%eax cpuid + and \$0xbfefffff,%edx # force reserved bits to 0 cmp \$0,%r9d jne .Lnotintel - or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR + or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs and \$15,%ah cmp \$15,%ah # examine Family ID - je .Lnotintel - or \$0x40000000,%edx # use reserved bit to skip unrolled loop + jne .Lnotintel + or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR .Lnotintel: bt \$28,%edx # test hyper-threading bit - jnc .Ldone + jnc .Lgeneric and \$0xefffffff,%edx # ~(1<<28) cmp \$0,%r10d - je .Ldone + je .Lgeneric or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp \$1,%bl # see if cache is shared - ja .Ldone + ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) +.Lgeneric: + and \$0x00000800,%r9d # isolate AMD XOP flag + and \$0xfffff7ff,%ecx + or %ecx,%r9d # merge AMD XOP flag + + mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx + bt \$27,%r9d # check OSXSAVE bit + jnc .Lclear_avx + xor %ecx,%ecx # XCR0 + .byte 0x0f,0x01,0xd0 # xgetbv + and \$6,%eax # isolate XMM and YMM state support + cmp \$6,%eax + je .Ldone +.Lclear_avx: + mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) + and %eax,%r9d # clear AVX, FMA and AMD XOP bits .Ldone: - shl \$32,%rcx - mov %edx,%eax - mov %r8,%rbx - or %rcx,%rax + shl \$32,%r9 + mov %r10d,%eax + mov %r8,%rbx # restore %rbx + or %r9,%rax ret .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid @@ -229,4 +264,21 @@ OPENSSL_wipe_cpu: .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu ___ +print<<___; +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,\@abi-omnipotent +.align 16 +OPENSSL_ia32_rdrand: + mov \$8,%ecx +.Loop_rdrand: + rdrand %rax + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmp \$0,%rax + cmove %rcx,%rax + ret +.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand +___ + close STDOUT; # flush diff --git a/app/openssl/crypto/x86cpuid.S b/app/openssl/crypto/x86cpuid.S new file mode 100644 index 00000000..87a46d4b --- /dev/null +++ b/app/openssl/crypto/x86cpuid.S @@ -0,0 +1,346 @@ +.file "x86cpuid.s" +.text +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: +.L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc .L000nocpuid + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz .L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz .L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb .L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb .L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc .L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja .L002generic + andl $4026531839,%edx + jmp .L002generic +.L001intel: + cmpl $4,%edi + movl $-1,%edi + jb .L003nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +.L003nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne .L004notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .L004notintel + orl $1048576,%edx +.L004notintel: + btl $28,%edx + jnc .L002generic + andl $4026531839,%edx + cmpl $0,%edi + je .L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .L002generic + andl $4026531839,%edx +.L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc .L005clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je .L006done + cmpl $2,%eax + je .L005clear_avx +.L007clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +.L005clear_avx: + andl $4026525695,%ebp +.L006done: + movl %esi,%eax + movl %ebp,%edx +.L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: +.L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + call .L008PIC_me_up +.L008PIC_me_up: + popl %ecx + leal _GLOBAL_OFFSET_TABLE_+[.-.L008PIC_me_up](%ecx),%ecx + movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx + btl $4,(%ecx) + jnc .L009notsc + .byte 0x0f,0x31 +.L009notsc: + ret +.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin +.globl OPENSSL_instrument_halt +.type OPENSSL_instrument_halt,@function +.align 16 +OPENSSL_instrument_halt: +.L_OPENSSL_instrument_halt_begin: + call .L010PIC_me_up +.L010PIC_me_up: + popl %ecx + leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%ecx),%ecx + movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx + btl $4,(%ecx) + jnc .L011nohalt +.long 2421723150 + andl $3,%eax + jnz .L011nohalt + pushfl + popl %eax + btl $9,%eax + jnc .L011nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +.L011nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin +.globl OPENSSL_far_spin +.type OPENSSL_far_spin,@function +.align 16 +OPENSSL_far_spin: +.L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc .L012nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp .L013spin +.align 16 +.L013spin: + incl %eax + cmpl (%ecx),%edx + je .L013spin +.long 529567888 + ret +.L012nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: +.L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + call .L014PIC_me_up +.L014PIC_me_up: + popl %ecx + leal _GLOBAL_OFFSET_TABLE_+[.-.L014PIC_me_up](%ecx),%ecx + movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc .L015no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne .L016no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +.L016no_sse2: +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +.L015no_x87: + leal 4(%esp),%eax + ret +.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: +.L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +.L017spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne .L017spin + movl %ebx,%eax + popl %ebx + ret +.size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin +.globl OPENSSL_indirect_call +.type OPENSSL_indirect_call,@function +.align 16 +OPENSSL_indirect_call: +.L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: +.L_OPENSSL_cleanse_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + xorl %eax,%eax + cmpl $7,%ecx + jae .L018lot + cmpl $0,%ecx + je .L019ret +.L020little: + movb %al,(%edx) + subl $1,%ecx + leal 1(%edx),%edx + jnz .L020little +.L019ret: + ret +.align 16 +.L018lot: + testl $3,%edx + jz .L021aligned + movb %al,(%edx) + leal -1(%ecx),%ecx + leal 1(%edx),%edx + jmp .L018lot +.L021aligned: + movl %eax,(%edx) + leal -4(%ecx),%ecx + testl $-4,%ecx + leal 4(%edx),%edx + jnz .L021aligned + cmpl $0,%ecx + jne .L020little + ret +.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: +.L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +.L022loop: +.byte 15,199,240 + jc .L023break + loop .L022loop +.L023break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.comm OPENSSL_ia32cap_P,8,4 +.section .init + call OPENSSL_cpuid_setup diff --git a/app/openssl/crypto/x86cpuid.pl b/app/openssl/crypto/x86cpuid.pl index a7464af1..b270b443 100644 --- a/app/openssl/crypto/x86cpuid.pl +++ b/app/openssl/crypto/x86cpuid.pl @@ -19,9 +19,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &pushf (); &pop ("eax"); &xor ("ecx","eax"); - &bt ("ecx",21); - &jnc (&label("done")); &xor ("eax","eax"); + &bt ("ecx",21); + &jnc (&label("nocpuid")); &cpuid (); &mov ("edi","eax"); # max value for standard query level @@ -51,7 +51,14 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } # AMD specific &mov ("eax",0x80000000); &cpuid (); - &cmp ("eax",0x80000008); + &cmp ("eax",0x80000001); + &jb (&label("intel")); + &mov ("esi","eax"); + &mov ("eax",0x80000001); + &cpuid (); + &or ("ebp","ecx"); + &and ("ebp",1<<11|1); # isolate XOP bit + &cmp ("esi",0x80000008); &jb (&label("intel")); &mov ("eax",0x80000008); @@ -60,15 +67,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &inc ("esi"); # number of cores &mov ("eax",1); + &xor ("ecx","ecx"); &cpuid (); &bt ("edx",28); - &jnc (&label("done")); + &jnc (&label("generic")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); - &ja (&label("done")); + &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit - &jmp (&label("done")); + &jmp (&label("generic")); &set_label("intel"); &cmp ("edi",4); @@ -84,28 +92,53 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("nocacheinfo"); &mov ("eax",1); + &xor ("ecx","ecx"); &cpuid (); + &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 &cmp ("ebp",0); - &jne (&label("notP4")); + &jne (&label("notintel")); + &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs &and (&HB("eax"),15); # familiy ID &cmp (&HB("eax"),15); # P4? - &jne (&label("notP4")); - &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR -&set_label("notP4"); + &jne (&label("notintel")); + &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR +&set_label("notintel"); &bt ("edx",28); # test hyper-threading bit - &jnc (&label("done")); + &jnc (&label("generic")); &and ("edx",0xefffffff); &cmp ("edi",0); - &je (&label("done")); + &je (&label("generic")); &or ("edx",0x10000000); &shr ("ebx",16); &cmp (&LB("ebx"),1); - &ja (&label("done")); + &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit if not + +&set_label("generic"); + &and ("ebp",1<<11); # isolate AMD XOP flag + &and ("ecx",0xfffff7ff); # force 11th bit to 0 + &mov ("esi","edx"); + &or ("ebp","ecx"); # merge AMD XOP flag + + &bt ("ecx",27); # check OSXSAVE bit + &jnc (&label("clear_avx")); + &xor ("ecx","ecx"); + &data_byte(0x0f,0x01,0xd0); # xgetbv + &and ("eax",6); + &cmp ("eax",6); + &je (&label("done")); + &cmp ("eax",2); + &je (&label("clear_avx")); +&set_label("clear_xmm"); + &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits + &and ("esi",0xfeffffff); # clear FXSR +&set_label("clear_avx"); + &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits &set_label("done"); - &mov ("eax","edx"); - &mov ("edx","ecx"); + &mov ("eax","esi"); + &mov ("edx","ebp"); +&set_label("nocpuid"); &function_end("OPENSSL_ia32_cpuid"); &external_label("OPENSSL_ia32cap_P"); @@ -134,7 +167,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnz (&label("nohalt")); # not enough privileges &pushf (); - &pop ("eax") + &pop ("eax"); &bt ("eax",9); &jnc (&label("nohalt")); # interrupts are disabled @@ -199,8 +232,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &bt (&DWP(0,"ecx"),1); &jnc (&label("no_x87")); if ($sse2) { - &bt (&DWP(0,"ecx"),26); - &jnc (&label("no_sse2")); + &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits + &cmp ("ecx",1<<26|1<<24); + &jne (&label("no_sse2")); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); @@ -248,7 +282,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } # arguments is 1 or 2! &function_begin_B("OPENSSL_indirect_call"); { - my $i,$max=7; # $max has to be chosen as 4*n-1 + my ($max,$i)=(7,); # $max has to be chosen as 4*n-1 # in order to preserve eventual # stack alignment &push ("ebp"); @@ -307,6 +341,18 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &ret (); &function_end_B("OPENSSL_cleanse"); +&function_begin_B("OPENSSL_ia32_rdrand"); + &mov ("ecx",8); +&set_label("loop"); + &rdrand ("eax"); + &jc (&label("break")); + &loop (&label("loop")); +&set_label("break"); + &cmp ("eax",0); + &cmove ("eax","ecx"); + &ret (); +&function_end_B("OPENSSL_ia32_rdrand"); + &initseg("OPENSSL_cpuid_setup"); &asm_finish(); diff --git a/app/openssl/e_os.h b/app/openssl/e_os.h index 5ceeeeb9..6a0aad1d 100644 --- a/app/openssl/e_os.h +++ b/app/openssl/e_os.h @@ -99,7 +99,6 @@ extern "C" { # ifndef MAC_OS_GUSI_SOURCE # define MAC_OS_pre_X # define NO_SYS_TYPES_H - typedef long ssize_t; # endif # define NO_SYS_PARAM_H # define NO_CHMOD @@ -340,8 +339,6 @@ static unsigned int _strlen31(const char *str) # define OPENSSL_NO_POSIX_IO # endif -# define ssize_t long - # if defined (__BORLANDC__) # define _setmode setmode # define _O_TEXT O_TEXT @@ -371,6 +368,13 @@ static unsigned int _strlen31(const char *str) # define DEFAULT_HOME "C:" # endif +/* Avoid Windows 8 SDK GetVersion deprecated problems */ +#if defined(_MSC_VER) && _MSC_VER>=1800 +# define check_winnt() (1) +#else +# define check_winnt() (GetVersion() < 0x80000000) +#endif + #else /* The non-microsoft world */ # ifdef OPENSSL_SYS_VMS @@ -456,9 +460,6 @@ static unsigned int _strlen31(const char *str) * (unless when compiling with -D_POSIX_SOURCE, * which doesn't work for us) */ # endif -# if defined(NeXT) || defined(OPENSSL_SYS_NEWS4) || defined(OPENSSL_SYS_SUNOS) -# define ssize_t int /* ditto */ -# endif # ifdef OPENSSL_SYS_NEWS4 /* setvbuf is missing on mips-sony-bsd */ # define setvbuf(a, b, c, d) setbuffer((a), (b), (d)) typedef unsigned long clock_t; @@ -637,12 +638,6 @@ static unsigned int _strlen31(const char *str) #endif -#if defined(__ultrix) -# ifndef ssize_t -# define ssize_t int -# endif -#endif - #if defined(sun) && !defined(__svr4__) && !defined(__SVR4) /* include headers first, so our defines don't break it */ #include diff --git a/app/openssl/e_os2.h b/app/openssl/e_os2.h index 4c785c62..d22c0368 100644 --- a/app/openssl/e_os2.h +++ b/app/openssl/e_os2.h @@ -193,8 +193,14 @@ extern "C" { #endif /* --------------------------------- VOS ----------------------------------- */ -#ifdef OPENSSL_SYSNAME_VOS +#if defined(__VOS__) || defined(OPENSSL_SYSNAME_VOS) # define OPENSSL_SYS_VOS +#ifdef __HPPA__ +# define OPENSSL_SYS_VOS_HPPA +#endif +#ifdef __IA32__ +# define OPENSSL_SYS_VOS_IA32 +#endif #endif /* ------------------------------- VxWorks --------------------------------- */ @@ -283,6 +289,26 @@ extern "C" { # define OPENSSL_GLOBAL_REF(name) _shadow_##name #endif +#if defined(OPENSSL_SYS_MACINTOSH_CLASSIC) && macintosh==1 && !defined(MAC_OS_GUSI_SOURCE) +# define ossl_ssize_t long +#endif + +#ifdef OPENSSL_SYS_MSDOS +# define ossl_ssize_t long +#endif + +#if defined(NeXT) || defined(OPENSSL_SYS_NEWS4) || defined(OPENSSL_SYS_SUNOS) +# define ssize_t int +#endif + +#if defined(__ultrix) && !defined(ssize_t) +# define ossl_ssize_t int +#endif + +#ifndef ossl_ssize_t +# define ossl_ssize_t ssize_t +#endif + #ifdef __cplusplus } #endif diff --git a/app/openssl/import_openssl.sh b/app/openssl/import_openssl.sh index 6f601989..02d2ab1c 100755 --- a/app/openssl/import_openssl.sh +++ b/app/openssl/import_openssl.sh @@ -27,6 +27,14 @@ set -e trap "echo WARNING: Exiting on non-zero subprocess exit code" ERR; +# Make sure we're in the right directory. +cd $(dirname $0) + +# Ensure consistent sorting order / tool output. +export LANG=C +export LC_ALL=C +PERL_EXE="perl -C0" + function die() { declare -r message=$1 @@ -56,7 +64,7 @@ function main() { die "openssl.version not found" fi - source openssl.version + source ./openssl.version if [ "$OPENSSL_VERSION" == "" ]; then die "Invalid openssl.version; see README.android for more information" fi @@ -68,7 +76,7 @@ function main() { die "openssl.config not found" fi - source openssl.config + source ./openssl.config if [ "$CONFIGURE_ARGS" == "" -o "$UNNEEDED_SOURCES" == "" -o "$NEEDED_SOURCES" == "" ]; then die "Invalid openssl.config; see README.android for more information" fi @@ -83,7 +91,7 @@ function main() { declare -r patch=$1 shift || usage "No patch file specified." [ -d $OPENSSL_DIR ] || usage "$OPENSSL_DIR not found, did you mean to use generate?" - [ -d $OPENSSL_DIR_ORIG_ORIG ] || usage "$OPENSSL_DIR_ORIG not found, did you mean to use generate?" + [ -d $OPENSSL_DIR_ORIG ] || usage "$OPENSSL_DIR_ORIG not found, did you mean to use generate?" regenerate $patch elif [ "$command" = "generate" ]; then declare -r patch=$1 @@ -96,26 +104,330 @@ function main() { fi } +# Compute the name of an assembly source file generated by one of the +# gen_asm_xxxx() functions below. The logic is the following: +# - if "$2" is not empty, output it directly +# - otherwise, change the file extension of $1 from .pl to .S and output +# it. +# Usage: default_asm_file "$1" "$2" +# or default_asm_file "$@" +# +# $1: generator path (perl script) +# $2: optional output file name. +function default_asm_file () { + if [ "$2" ]; then + echo "$2" + else + echo "${1%%.pl}.S" + fi +} + +# Generate an ARM assembly file. +# $1: generator (perl script) +# $2: [optional] output file name +function gen_asm_arm () { + local OUT + OUT=$(default_asm_file "$@") + $PERL_EXE "$1" > "$OUT" +} + +function gen_asm_mips () { + local OUT + OUT=$(default_asm_file "$@") + # The perl scripts expect to run the target compiler as $CC to determine + # the endianess of the target. Setting CC to true is a hack that forces the scripts + # to generate little endian output + CC=true $PERL_EXE "$1" o32 > "$OUT" +} + +function gen_asm_x86 () { + local OUT + OUT=$(default_asm_file "$@") + $PERL_EXE "$1" elf -fPIC $(print_values_with_prefix -D $OPENSSL_CRYPTO_DEFINES_x86) > "$OUT" +} + +function gen_asm_x86_64 () { + local OUT + OUT=$(default_asm_file "$@") + $PERL_EXE "$1" elf "$OUT" > "$OUT" +} + + +# Filter all items in a list that match a given pattern. +# $1: space-separated list +# $2: egrep pattern. +# Out: items in $1 that match $2 +function filter_by_egrep() { + declare -r pattern=$1 + shift + echo "$@" | tr ' ' '\n' | grep -e "$pattern" | tr '\n' ' ' +} + +# Sort and remove duplicates in a space-separated list +# $1: space-separated list +# Out: new space-separated list +function uniq_sort () { + echo "$@" | tr ' ' '\n' | sort -u | tr '\n' ' ' +} + +function print_autogenerated_header() { + echo "# Auto-generated - DO NOT EDIT!" + echo "# To regenerate, edit openssl.config, then run:" + echo "# ./import_openssl.sh import /path/to/openssl-$OPENSSL_VERSION.tar.gz" + echo "#" +} + +# Run Configure and generate headers +# $1: 32 for 32-bit arch, 64 for 64-bit arch, trusty for Trusty +# $2: 1 if building for static version +# Out: returns the cflags and depflags in variable $flags +function generate_build_config_headers() { + chmod +x ./Configure + local configure_args_bits=CONFIGURE_ARGS_$1 + local configure_args_stat='' + local outname=$1 + if [[ $2 == 1 ]] ; then + configure_args_stat=CONFIGURE_ARGS_STATIC + outname="static-$1" + fi + + if [[ $1 == trusty ]] ; then + PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS_TRUSTY + else + PERL=/usr/bin/perl ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat} + fi + + rm -f apps/CA.pl.bak crypto/opensslconf.h.bak + mv -f crypto/opensslconf.h crypto/opensslconf-$outname.h + cp -f crypto/opensslconf-$outname.h include/openssl/opensslconf-$outname.h + + local tmpfile=$(mktemp tmp.XXXXXXXXXX) + (grep -e -D Makefile | grep -v CONFIGURE_ARGS= | grep -v OPTIONS= | \ + grep -v -e -DOPENSSL_NO_DEPRECATED) > $tmpfile + declare -r cflags=$(filter_by_egrep "^-D" $(grep -e "^CFLAG=" $tmpfile)) + declare -r depflags=$(filter_by_egrep "^-D" $(grep -e "^DEPFLAG=" $tmpfile)) + rm -f $tmpfile + + flags="$cflags $depflags" +} + +# Run Configure and generate makefiles +function generate_build_config_mk() { + chmod +x ./Configure + for bits in 32 64 trusty; do + # Header flags are output in $flags, first static, then dynamic + generate_build_config_headers $bits 1 + local flags_static=$flags + generate_build_config_headers $bits + + echo "Generating build-config-$bits.mk" + ( + print_autogenerated_header + + echo "openssl_cflags_$bits := \\" + for flag in $flags ; do echo " $flag \\" ; done + echo "" + + echo "openssl_cflags_static_$bits := \\" + for flag in $flags_static; do echo " $flag \\" ; done + echo "" + ) > ../build-config-$bits.mk + done +} + +# Generate crypto/opensslconf.h file including arch-specific files +function generate_opensslconf_h() { + echo "Generating opensslconf.h" + ( + echo "// Auto-generated - DO NOT EDIT!" + echo "#ifndef OPENSSL_SYS_TRUSTY" + echo "#if defined(__LP64__)" + echo "#include \"opensslconf-64.h\"" + echo "#else" + echo "#include \"opensslconf-32.h\"" + echo "#endif" + echo "#else" + echo "#include \"opensslconf-trusty.h\"" + echo "#endif" + ) > crypto/opensslconf.h + # Generate a compatible version for the static library builds + echo "Generating opensslconf-static.h" + ( + echo "// Auto-generated - DO NOT EDIT!" + echo "#if defined(__LP64__)" + echo "#include \"opensslconf-static-64.h\"" + echo "#else" + echo "#include \"opensslconf-static-32.h\"" + echo "#endif" + ) > crypto/opensslconf-static.h + # move it to output include files as well + cp -f crypto/opensslconf-static.h include/openssl/opensslconf-static.h +} + +# Return the value of a computed variable name. +# E.g.: +# FOO=foo +# BAR=bar +# echo $(var_value FOO_$BAR) -> prints the value of ${FOO_bar} +# $1: Variable name +# Out: variable value +var_value() { + # Note: don't use 'echo' here, because it's sensitive to values + # that begin with an underscore (e.g. "-n") + eval printf \"%s\\n\" \$$1 +} + +# Same as var_value, but returns sorted output without duplicates. +# $1: Variable name +# Out: variable value (if space-separated list, sorted with no duplicates) +var_sorted_value() { + uniq_sort $(var_value $1) +} + +# Print the values in a list with a prefix +# $1: prefix to use +# $2+: values of list +print_values_with_prefix() { + declare -r prefix=$1 + shift + for src; do + echo -n " $prefix$src " + done +} + +# Print the definition of a given variable in a GNU Make build file. +# $1: Variable name (e.g. common_src_files) +# $2: prefix for each variable contents +# $3+: Variable value (e.g. list of sources) +print_vardef_with_prefix_in_mk() { + declare -r varname=$1 + declare -r prefix=$2 + shift + shift + if [ -z "$1" ]; then + echo "$varname :=" + else + echo "$varname := \\" + for src; do + echo " $prefix$src \\" + done + fi + echo "" +} +# Print the definition of a given variable in a GNU Make build file. +# $1: Variable name (e.g. common_src_files) +# $2+: Variable value (e.g. list of sources) +print_vardef_in_mk() { + declare -r varname=$1 + shift + print_vardef_with_prefix_in_mk $varname "" $@ +} + +# Same as print_vardef_in_mk, but print a CFLAGS definition from +# a list of compiler defines. +# $1: Variable name (e.g. common_cflags) +# $2: List of defines (e.g. OPENSSL_NO_CAMELLIA ...) +print_defines_in_mk() { + declare -r varname=$1 + shift + if [ -z "$1" ]; then + echo "$varname :=" + else + echo "$varname := \\" + for def; do + echo " -D$def \\" + done + fi + echo "" +} + +# Generate a configuration file like Crypto-config.mk +# This uses variable definitions from openssl.config to build a config +# file that can compute the list of target- and host-specific sources / +# compiler flags for a given component. +# +# $1: Target file name. (e.g. Crypto-config.mk) +# $2: Variable prefix. (e.g. CRYPTO) +# $3: "host" or "target" +function generate_config_mk() { + declare -r output="$1" + declare -r prefix="$2" + declare -r all_archs="arm arm64 x86 x86_64 mips" + + echo "Generating $(basename $output)" + ( + print_autogenerated_header + echo \ +"# This script will append to the following variables: +# +# LOCAL_CFLAGS +# LOCAL_C_INCLUDES +# LOCAL_SRC_FILES_\$(TARGET_ARCH) +# LOCAL_SRC_FILES_\$(TARGET_2ND_ARCH) +# LOCAL_CFLAGS_\$(TARGET_ARCH) +# LOCAL_CFLAGS_\$(TARGET_2ND_ARCH) +# LOCAL_ADDITIONAL_DEPENDENCIES + + +LOCAL_ADDITIONAL_DEPENDENCIES += \$(LOCAL_PATH)/$(basename $output) +" + + common_defines=$(var_sorted_value OPENSSL_${prefix}_DEFINES) + print_defines_in_mk common_cflags $common_defines + + common_sources=$(var_sorted_value OPENSSL_${prefix}_SOURCES) + print_vardef_in_mk common_src_files $common_sources + + common_includes=$(var_sorted_value OPENSSL_${prefix}_INCLUDES) + print_vardef_with_prefix_in_mk common_c_includes external/openssl/ $common_includes + + for arch in $all_archs; do + arch_defines=$(var_sorted_value OPENSSL_${prefix}_DEFINES_${arch}) + print_defines_in_mk ${arch}_cflags $arch_defines + + arch_sources=$(var_sorted_value OPENSSL_${prefix}_SOURCES_${arch}) + print_vardef_in_mk ${arch}_src_files $arch_sources + + arch_exclude_sources=$(var_sorted_value OPENSSL_${prefix}_SOURCES_EXCLUDES_${arch}) + print_vardef_in_mk ${arch}_exclude_files $arch_exclude_sources + + done + + if [ $3 == "target" ]; then + echo " +LOCAL_CFLAGS += \$(common_cflags) +LOCAL_C_INCLUDES += \$(common_c_includes)" + for arch in $all_archs; do + echo " +LOCAL_SRC_FILES_${arch} += \$(filter-out \$(${arch}_exclude_files),\$(common_src_files) \$(${arch}_src_files)) +LOCAL_CFLAGS_${arch} += \$(${arch}_cflags)" + done + else + echo " +LOCAL_CFLAGS += \$(common_cflags) +LOCAL_C_INCLUDES += \$(common_c_includes) \$(local_c_includes) + +ifeq (\$(HOST_OS),linux) +LOCAL_CFLAGS_x86 += \$(x86_cflags) +LOCAL_SRC_FILES_x86 += \$(filter-out \$(x86_exclude_files), \$(common_src_files) \$(x86_src_files)) +LOCAL_CFLAGS_x86_64 += \$(x86_64_cflags) +LOCAL_SRC_FILES_x86_64 += \$(filter-out \$(x86_64_exclude_files), \$(common_src_files) \$(x86_64_src_files)) +else +\$(warning Unknown host OS \$(HOST_OS)) +LOCAL_SRC_FILES += \$(common_src_files) +endif" + fi + ) > "$output" +} + function import() { declare -r OPENSSL_SOURCE=$1 - untar $OPENSSL_SOURCE readonly applypatches $OPENSSL_DIR cd $OPENSSL_DIR - - # Configure source (and print Makefile defines for review, see README.android) - ./Configure $CONFIGURE_ARGS - rm -f apps/CA.pl.bak crypto/opensslconf.h.bak - echo - echo BEGIN Makefile defines to compare with android-config.mk - echo - grep -e -D Makefile | grep -v CONFIGURE_ARGS= | grep -v OPTIONS= | grep -v -e -DOPENSSL_NO_DEPRECATED - echo - echo END Makefile defines to compare with android-config.mk - echo - - # TODO(): Fixup android-config.mk + generate_build_config_mk + generate_opensslconf_h cp -f LICENSE ../NOTICE touch ../MODULE_LICENSE_BSD_LIKE @@ -129,17 +441,59 @@ function import() { fi done - # Copy Makefiles - cp ../patches/apps_Android.mk apps/Android.mk - cp ../patches/crypto_Android.mk crypto/Android.mk - cp ../patches/ssl_Android.mk ssl/Android.mk - - # Generate asm - perl crypto/aes/asm/aes-armv4.pl > crypto/aes/asm/aes-armv4.s - perl crypto/bn/asm/armv4-mont.pl > crypto/bn/asm/armv4-mont.s - perl crypto/sha/asm/sha1-armv4-large.pl > crypto/sha/asm/sha1-armv4-large.s - perl crypto/sha/asm/sha256-armv4.pl > crypto/sha/asm/sha256-armv4.s - perl crypto/sha/asm/sha512-armv4.pl > crypto/sha/asm/sha512-armv4.s + # Generate arm asm + gen_asm_arm crypto/aes/asm/aes-armv4.pl + gen_asm_arm crypto/aes/asm/bsaes-armv7.pl + gen_asm_arm crypto/bn/asm/armv4-gf2m.pl + gen_asm_arm crypto/bn/asm/armv4-mont.pl + gen_asm_arm crypto/modes/asm/ghash-armv4.pl + gen_asm_arm crypto/sha/asm/sha1-armv4-large.pl + gen_asm_arm crypto/sha/asm/sha256-armv4.pl + gen_asm_arm crypto/sha/asm/sha512-armv4.pl + + # Generate mips asm + gen_asm_mips crypto/aes/asm/aes-mips.pl + gen_asm_mips crypto/bn/asm/mips.pl crypto/bn/asm/bn-mips.S + gen_asm_mips crypto/bn/asm/mips-mont.pl + gen_asm_mips crypto/sha/asm/sha1-mips.pl + gen_asm_mips crypto/sha/asm/sha512-mips.pl crypto/sha/asm/sha256-mips.S + + # Generate x86 asm + gen_asm_x86 crypto/x86cpuid.pl + gen_asm_x86 crypto/aes/asm/aes-586.pl + gen_asm_x86 crypto/aes/asm/vpaes-x86.pl + gen_asm_x86 crypto/aes/asm/aesni-x86.pl + gen_asm_x86 crypto/bn/asm/bn-586.pl + gen_asm_x86 crypto/bn/asm/co-586.pl + gen_asm_x86 crypto/bn/asm/x86-mont.pl + gen_asm_x86 crypto/bn/asm/x86-gf2m.pl + gen_asm_x86 crypto/modes/asm/ghash-x86.pl + gen_asm_x86 crypto/sha/asm/sha1-586.pl + gen_asm_x86 crypto/sha/asm/sha256-586.pl + gen_asm_x86 crypto/sha/asm/sha512-586.pl + gen_asm_x86 crypto/md5/asm/md5-586.pl + gen_asm_x86 crypto/des/asm/des-586.pl + gen_asm_x86 crypto/des/asm/crypt586.pl + gen_asm_x86 crypto/bf/asm/bf-586.pl + + # Generate x86_64 asm + gen_asm_x86_64 crypto/x86_64cpuid.pl + gen_asm_x86_64 crypto/sha/asm/sha1-x86_64.pl + gen_asm_x86_64 crypto/sha/asm/sha512-x86_64.pl crypto/sha/asm/sha256-x86_64.S + gen_asm_x86_64 crypto/sha/asm/sha512-x86_64.pl + gen_asm_x86_64 crypto/modes/asm/ghash-x86_64.pl + gen_asm_x86_64 crypto/aes/asm/aesni-x86_64.pl + gen_asm_x86_64 crypto/aes/asm/vpaes-x86_64.pl + gen_asm_x86_64 crypto/aes/asm/bsaes-x86_64.pl + gen_asm_x86_64 crypto/aes/asm/aes-x86_64.pl + gen_asm_x86_64 crypto/aes/asm/aesni-sha1-x86_64.pl + gen_asm_x86_64 crypto/md5/asm/md5-x86_64.pl + gen_asm_x86_64 crypto/bn/asm/modexp512-x86_64.pl + gen_asm_x86_64 crypto/bn/asm/x86_64-mont.pl + gen_asm_x86_64 crypto/bn/asm/x86_64-gf2m.pl + gen_asm_x86_64 crypto/bn/asm/x86_64-mont5.pl + gen_asm_x86_64 crypto/rc4/asm/rc4-x86_64.pl + gen_asm_x86_64 crypto/rc4/asm/rc4-md5-x86_64.pl # Setup android.testssl directory mkdir android.testssl @@ -159,6 +513,14 @@ function import() { cd .. + generate_config_mk Crypto-config-target.mk CRYPTO target + generate_config_mk Crypto-config-host.mk CRYPTO host + generate_config_mk Crypto-config-trusty.mk CRYPTO_TRUSTY target + generate_config_mk Ssl-config-target.mk SSL target + generate_config_mk Ssl-config-host.mk SSL host + generate_config_mk Apps-config-target.mk APPS target + generate_config_mk Apps-config-host.mk APPS host + # Prune unnecessary sources prune @@ -196,6 +558,24 @@ function generate() { cleantar } +# Find all files in a sub-directory that are encoded in ISO-8859 +# $1: Directory. +# Out: list of files in $1 that are encoded as ISO-8859. +function find_iso8859_files() { + find $1 -type f -print0 | xargs -0 file --mime-encoding | grep -i "iso-8859" | cut -d: -f1 +} + +# Convert all ISO-8859 files in a given subdirectory to UTF-8 +# $1: Directory name +function convert_iso8859_to_utf8() { + declare -r iso_files=$(find_iso8859_files "$1") + for iso_file in $iso_files; do + iconv --from-code iso-8859-1 --to-code utf-8 $iso_file > $iso_file.tmp + rm -f $iso_file + mv $iso_file.tmp $iso_file + done +} + function untar() { declare -r OPENSSL_SOURCE=$1 declare -r readonly=$2 @@ -205,11 +585,11 @@ function untar() { # Process new source tar -zxf $OPENSSL_SOURCE - mv $OPENSSL_DIR $OPENSSL_DIR_ORIG + convert_iso8859_to_utf8 $OPENSSL_DIR + cp -RfP $OPENSSL_DIR $OPENSSL_DIR_ORIG if [ ! -z $readonly ]; then find $OPENSSL_DIR_ORIG -type f -print0 | xargs -0 chmod a-w fi - tar -zxf $OPENSSL_SOURCE } function prune() { @@ -241,7 +621,7 @@ function applypatches () { done # Cleanup patch output - find . -type f -name "*.orig" -print0 | xargs -0 rm -f + find . \( -type f -o -type l \) -name "*.orig" -print0 | xargs -0 rm -f cd .. } @@ -250,12 +630,12 @@ function generatepatch() { declare -r patch=$1 # Cleanup stray files before generating patch - find $BOUNCYCASTLE_DIR -type f -name "*.orig" -print0 | xargs -0 rm -f - find $BOUNCYCASTLE_DIR -type f -name "*~" -print0 | xargs -0 rm -f + find $OPENSSL_DIR -type f -name "*.orig" -print0 | xargs -0 rm -f + find $OPENSSL_DIR -type f -name "*~" -print0 | xargs -0 rm -f + + # Find the files the patch touches and only keep those in the output patch + declare -r sources=`patch -p1 --dry-run -d $OPENSSL_DIR < $patch | awk '/^patching file / { print $3 }'` - declare -r variable_name=OPENSSL_PATCHES_`basename $patch .patch | sed s/-/_/`_SOURCES - # http://tldp.org/LDP/abs/html/ivr.html - eval declare -r sources=\$$variable_name rm -f $patch touch $patch for i in $sources; do diff --git a/app/openssl/include/openssl/aes.h b/app/openssl/include/openssl/aes.h index d2c99730..031abf01 100644 --- a/app/openssl/include/openssl/aes.h +++ b/app/openssl/include/openssl/aes.h @@ -90,6 +90,11 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits, int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + void AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key); void AES_decrypt(const unsigned char *in, unsigned char *out, diff --git a/app/openssl/include/openssl/asn1.h b/app/openssl/include/openssl/asn1.h index 59540e4e..220a0c8c 100644 --- a/app/openssl/include/openssl/asn1.h +++ b/app/openssl/include/openssl/asn1.h @@ -235,7 +235,7 @@ typedef struct asn1_object_st */ #define ASN1_STRING_FLAG_MSTRING 0x040 /* This is the base type that holds just about everything :-) */ -typedef struct asn1_string_st +struct asn1_string_st { int length; int type; @@ -245,7 +245,7 @@ typedef struct asn1_string_st * input data has a non-zero 'unused bits' value, it will be * handled correctly */ long flags; - } ASN1_STRING; + }; /* ASN1_ENCODING structure: this is used to save the received * encoding of an ASN1 type. This is useful to get round @@ -293,7 +293,6 @@ DECLARE_STACK_OF(ASN1_STRING_TABLE) * see asn1t.h */ typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE; -typedef struct ASN1_ITEM_st ASN1_ITEM; typedef struct ASN1_TLC_st ASN1_TLC; /* This is just an opaque pointer */ typedef struct ASN1_VALUE_st ASN1_VALUE; @@ -1194,6 +1193,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_F_ASN1_ITEM_I2D_FP 193 #define ASN1_F_ASN1_ITEM_PACK 198 #define ASN1_F_ASN1_ITEM_SIGN 195 +#define ASN1_F_ASN1_ITEM_SIGN_CTX 220 #define ASN1_F_ASN1_ITEM_UNPACK 199 #define ASN1_F_ASN1_ITEM_VERIFY 197 #define ASN1_F_ASN1_MBSTRING_NCOPY 122 @@ -1266,6 +1266,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_F_PKCS5_PBE2_SET_IV 167 #define ASN1_F_PKCS5_PBE_SET 202 #define ASN1_F_PKCS5_PBE_SET0_ALGOR 215 +#define ASN1_F_PKCS5_PBKDF2_SET 219 #define ASN1_F_SMIME_READ_ASN1 212 #define ASN1_F_SMIME_TEXT 213 #define ASN1_F_X509_CINF_NEW 168 @@ -1291,6 +1292,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_R_BOOLEAN_IS_WRONG_LENGTH 106 #define ASN1_R_BUFFER_TOO_SMALL 107 #define ASN1_R_CIPHER_HAS_NO_OBJECT_IDENTIFIER 108 +#define ASN1_R_CONTEXT_NOT_INITIALISED 217 #define ASN1_R_DATA_IS_WRONG 109 #define ASN1_R_DECODE_ERROR 110 #define ASN1_R_DECODING_ERROR 111 diff --git a/app/openssl/include/openssl/bio.h b/app/openssl/include/openssl/bio.h index 152802fb..05699ab2 100644 --- a/app/openssl/include/openssl/bio.h +++ b/app/openssl/include/openssl/bio.h @@ -68,6 +68,14 @@ #include +#ifndef OPENSSL_NO_SCTP +# ifndef OPENSSL_SYS_VMS +# include +# else +# include +# endif +#endif + #ifdef __cplusplus extern "C" { #endif @@ -95,6 +103,9 @@ extern "C" { #define BIO_TYPE_BIO (19|0x0400) /* (half a) BIO pair */ #define BIO_TYPE_LINEBUFFER (20|0x0200) /* filter */ #define BIO_TYPE_DGRAM (21|0x0400|0x0100) +#ifndef OPENSSL_NO_SCTP +#define BIO_TYPE_DGRAM_SCTP (24|0x0400|0x0100) +#endif #define BIO_TYPE_ASN1 (22|0x0200) /* filter */ #define BIO_TYPE_COMP (23|0x0200) /* filter */ @@ -146,6 +157,7 @@ extern "C" { /* #endif */ #define BIO_CTRL_DGRAM_QUERY_MTU 40 /* as kernel for current MTU */ +#define BIO_CTRL_DGRAM_GET_FALLBACK_MTU 47 #define BIO_CTRL_DGRAM_GET_MTU 41 /* get cached value for MTU */ #define BIO_CTRL_DGRAM_SET_MTU 42 /* set cached value for * MTU. want to use this @@ -161,7 +173,22 @@ extern "C" { #define BIO_CTRL_DGRAM_SET_PEER 44 /* Destination for the data */ #define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45 /* Next DTLS handshake timeout to - * adjust socket timeouts */ + * adjust socket timeouts */ + +#ifndef OPENSSL_NO_SCTP +/* SCTP stuff */ +#define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 +#define BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY 51 +#define BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY 52 +#define BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD 53 +#define BIO_CTRL_DGRAM_SCTP_GET_SNDINFO 60 +#define BIO_CTRL_DGRAM_SCTP_SET_SNDINFO 61 +#define BIO_CTRL_DGRAM_SCTP_GET_RCVINFO 62 +#define BIO_CTRL_DGRAM_SCTP_SET_RCVINFO 63 +#define BIO_CTRL_DGRAM_SCTP_GET_PRINFO 64 +#define BIO_CTRL_DGRAM_SCTP_SET_PRINFO 65 +#define BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN 70 +#endif /* modifiers */ #define BIO_FP_READ 0x02 @@ -306,6 +333,15 @@ DECLARE_STACK_OF(BIO) typedef struct bio_f_buffer_ctx_struct { + /* Buffers are setup like this: + * + * <---------------------- size -----------------------> + * +---------------------------------------------------+ + * | consumed | remaining | free space | + * +---------------------------------------------------+ + * <-- off --><------- len -------> + */ + /* BIO *bio; */ /* this is now in the BIO struct */ int ibuf_size; /* how big is the input buffer */ int obuf_size; /* how big is the output buffer */ @@ -322,6 +358,34 @@ typedef struct bio_f_buffer_ctx_struct /* Prefix and suffix callback in ASN1 BIO */ typedef int asn1_ps_func(BIO *b, unsigned char **pbuf, int *plen, void *parg); +#ifndef OPENSSL_NO_SCTP +/* SCTP parameter structs */ +struct bio_dgram_sctp_sndinfo + { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; + }; + +struct bio_dgram_sctp_rcvinfo + { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; + }; + +struct bio_dgram_sctp_prinfo + { + uint16_t pr_policy; + uint32_t pr_value; + }; +#endif + /* connect BIO stuff */ #define BIO_CONN_S_BEFORE 1 #define BIO_CONN_S_GET_IP 2 @@ -619,6 +683,9 @@ BIO_METHOD *BIO_f_linebuffer(void); BIO_METHOD *BIO_f_nbio_test(void); #ifndef OPENSSL_NO_DGRAM BIO_METHOD *BIO_s_datagram(void); +#ifndef OPENSSL_NO_SCTP +BIO_METHOD *BIO_s_datagram_sctp(void); +#endif #endif /* BIO_METHOD *BIO_f_ber(void); */ @@ -661,6 +728,15 @@ int BIO_set_tcp_ndelay(int sock,int turn_on); BIO *BIO_new_socket(int sock, int close_flag); BIO *BIO_new_dgram(int fd, int close_flag); +#ifndef OPENSSL_NO_SCTP +BIO *BIO_new_dgram_sctp(int fd, int close_flag); +int BIO_dgram_is_sctp(BIO *bio); +int BIO_dgram_sctp_notification_cb(BIO *b, + void (*handle_notifications)(BIO *bio, void *context, void *buf), + void *context); +int BIO_dgram_sctp_wait_for_dry(BIO *b); +int BIO_dgram_sctp_msg_waiting(BIO *b); +#endif BIO *BIO_new_fd(int fd, int close_flag); BIO *BIO_new_connect(char *host_port); BIO *BIO_new_accept(char *host_port); @@ -725,6 +801,7 @@ void ERR_load_BIO_strings(void); #define BIO_F_BUFFER_CTRL 114 #define BIO_F_CONN_CTRL 127 #define BIO_F_CONN_STATE 115 +#define BIO_F_DGRAM_SCTP_READ 132 #define BIO_F_FILE_CTRL 116 #define BIO_F_FILE_READ 130 #define BIO_F_LINEBUFFER_CTRL 129 diff --git a/app/openssl/include/openssl/blowfish.h b/app/openssl/include/openssl/blowfish.h index b97e76f9..4b6c8920 100644 --- a/app/openssl/include/openssl/blowfish.h +++ b/app/openssl/include/openssl/blowfish.h @@ -104,7 +104,9 @@ typedef struct bf_key_st BF_LONG S[4*256]; } BF_KEY; - +#ifdef OPENSSL_FIPS +void private_BF_set_key(BF_KEY *key, int len, const unsigned char *data); +#endif void BF_set_key(BF_KEY *key, int len, const unsigned char *data); void BF_encrypt(BF_LONG *data,const BF_KEY *key); diff --git a/app/openssl/include/openssl/bn.h b/app/openssl/include/openssl/bn.h index a0bc4783..e776c07a 100644 --- a/app/openssl/include/openssl/bn.h +++ b/app/openssl/include/openssl/bn.h @@ -538,6 +538,8 @@ BIGNUM *BN_mod_inverse(BIGNUM *ret, BIGNUM *BN_mod_sqrt(BIGNUM *ret, const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); +void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords); + /* Deprecated versions */ #ifndef OPENSSL_NO_DEPRECATED BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe, @@ -558,6 +560,17 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, int do_trial_division, BN_GENCB *cb); +int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); + +int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, + const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, + const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); +int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, + BIGNUM *Xp1, BIGNUM *Xp2, + const BIGNUM *Xp, + const BIGNUM *e, BN_CTX *ctx, + BN_GENCB *cb); + BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, @@ -612,6 +625,8 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_RECP_CTX *recp, BN_CTX *ctx); +#ifndef OPENSSL_NO_EC2M + /* Functions for arithmetic over binary polynomials represented by BIGNUMs. * * The BIGNUM::neg property of BIGNUMs representing binary polynomials is @@ -663,6 +678,8 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); int BN_GF2m_arr2poly(const int p[], BIGNUM *a); +#endif + /* faster mod functions for the 'NIST primes' * 0 <= a < p^2 */ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); @@ -677,6 +694,10 @@ const BIGNUM *BN_get0_nist_prime_256(void); const BIGNUM *BN_get0_nist_prime_384(void); const BIGNUM *BN_get0_nist_prime_521(void); +int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv, + const unsigned char *message, size_t message_len, + BN_CTX *ctx); + /* library internal functions */ #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ @@ -759,11 +780,20 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); #define bn_fix_top(a) bn_check_top(a) +#define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) +#define bn_wcheck_size(bn, words) \ + do { \ + const BIGNUM *_bnum2 = (bn); \ + assert(words <= (_bnum2)->dmax && words >= (_bnum2)->top); \ + } while(0) + #else /* !BN_DEBUG */ #define bn_pollute(a) #define bn_check_top(a) #define bn_fix_top(a) bn_correct_top(a) +#define bn_check_size(bn, bits) +#define bn_wcheck_size(bn, words) #endif @@ -827,6 +857,7 @@ void ERR_load_BN_strings(void); #define BN_F_BN_EXP 123 #define BN_F_BN_EXPAND2 108 #define BN_F_BN_EXPAND_INTERNAL 120 +#define BN_F_BN_GENERATE_DSA_NONCE 140 #define BN_F_BN_GF2M_MOD 131 #define BN_F_BN_GF2M_MOD_EXP 132 #define BN_F_BN_GF2M_MOD_MUL 133 @@ -866,6 +897,7 @@ void ERR_load_BN_strings(void); #define BN_R_NOT_INITIALIZED 107 #define BN_R_NO_INVERSE 108 #define BN_R_NO_SOLUTION 116 +#define BN_R_PRIVATE_KEY_TOO_LARGE 117 #define BN_R_P_IS_NOT_PRIME 112 #define BN_R_TOO_MANY_ITERATIONS 113 #define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 diff --git a/app/openssl/include/openssl/buffer.h b/app/openssl/include/openssl/buffer.h index 178e4182..f8da32b4 100644 --- a/app/openssl/include/openssl/buffer.h +++ b/app/openssl/include/openssl/buffer.h @@ -88,7 +88,7 @@ int BUF_MEM_grow_clean(BUF_MEM *str, size_t len); char * BUF_strdup(const char *str); char * BUF_strndup(const char *str, size_t siz); void * BUF_memdup(const void *data, size_t siz); -void BUF_reverse(unsigned char *out, unsigned char *in, size_t siz); +void BUF_reverse(unsigned char *out, const unsigned char *in, size_t siz); /* safe string functions */ size_t BUF_strlcpy(char *dst,const char *src,size_t siz); diff --git a/app/openssl/include/openssl/cmac.h b/app/openssl/include/openssl/cmac.h new file mode 100644 index 00000000..712e92dc --- /dev/null +++ b/app/openssl/include/openssl/cmac.h @@ -0,0 +1,82 @@ +/* crypto/cmac/cmac.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + + +#ifndef HEADER_CMAC_H +#define HEADER_CMAC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Opaque */ +typedef struct CMAC_CTX_st CMAC_CTX; + +CMAC_CTX *CMAC_CTX_new(void); +void CMAC_CTX_cleanup(CMAC_CTX *ctx); +void CMAC_CTX_free(CMAC_CTX *ctx); +EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx); +int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in); + +int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, + const EVP_CIPHER *cipher, ENGINE *impl); +int CMAC_Update(CMAC_CTX *ctx, const void *data, size_t dlen); +int CMAC_Final(CMAC_CTX *ctx, unsigned char *out, size_t *poutlen); +int CMAC_resume(CMAC_CTX *ctx); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/app/openssl/include/openssl/cms.h b/app/openssl/include/openssl/cms.h new file mode 100644 index 00000000..36994fa6 --- /dev/null +++ b/app/openssl/include/openssl/cms.h @@ -0,0 +1,501 @@ +/* crypto/cms/cms.h */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + + +#ifndef HEADER_CMS_H +#define HEADER_CMS_H + +#include + +#ifdef OPENSSL_NO_CMS +#error CMS is disabled. +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct CMS_ContentInfo_st CMS_ContentInfo; +typedef struct CMS_SignerInfo_st CMS_SignerInfo; +typedef struct CMS_CertificateChoices CMS_CertificateChoices; +typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; +typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; +typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; +typedef struct CMS_Receipt_st CMS_Receipt; + +DECLARE_STACK_OF(CMS_SignerInfo) +DECLARE_STACK_OF(GENERAL_NAMES) +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) +DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +#define CMS_SIGNERINFO_ISSUER_SERIAL 0 +#define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +#define CMS_RECIPINFO_TRANS 0 +#define CMS_RECIPINFO_AGREE 1 +#define CMS_RECIPINFO_KEK 2 +#define CMS_RECIPINFO_PASS 3 +#define CMS_RECIPINFO_OTHER 4 + +/* S/MIME related flags */ + +#define CMS_TEXT 0x1 +#define CMS_NOCERTS 0x2 +#define CMS_NO_CONTENT_VERIFY 0x4 +#define CMS_NO_ATTR_VERIFY 0x8 +#define CMS_NOSIGS \ + (CMS_NO_CONTENT_VERIFY|CMS_NO_ATTR_VERIFY) +#define CMS_NOINTERN 0x10 +#define CMS_NO_SIGNER_CERT_VERIFY 0x20 +#define CMS_NOVERIFY 0x20 +#define CMS_DETACHED 0x40 +#define CMS_BINARY 0x80 +#define CMS_NOATTR 0x100 +#define CMS_NOSMIMECAP 0x200 +#define CMS_NOOLDMIMETYPE 0x400 +#define CMS_CRLFEOL 0x800 +#define CMS_STREAM 0x1000 +#define CMS_NOCRL 0x2000 +#define CMS_PARTIAL 0x4000 +#define CMS_REUSE_DIGEST 0x8000 +#define CMS_USE_KEYID 0x10000 +#define CMS_DEBUG_DECRYPT 0x20000 + +const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms); + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont); +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *bio); + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms); +int CMS_is_detached(CMS_ContentInfo *cms); +int CMS_set_detached(CMS_ContentInfo *cms, int detached); + +#ifdef HEADER_PEM_H +DECLARE_PEM_rw_const(CMS, CMS_ContentInfo) +#endif + +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms); +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms); +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms); + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms); +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont); +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags); + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, unsigned int flags); + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, unsigned int flags); + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, + unsigned int flags); + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags); + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags); + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags); + +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, size_t keylen, + unsigned int flags); + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen); + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags); + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags); + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms); + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags); + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pkey, X509 *cert, + BIO *dcont, BIO *out, + unsigned int flags); + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert); +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen); +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen); + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags); +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey); +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert); +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg); +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType); + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype); + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen); + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen); + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, + ossl_ssize_t passlen); + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph); + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags); + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid); +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms); + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms); +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert); +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert); +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms); + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms); +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl); +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl); +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms); + +int CMS_SignedData_init(CMS_ContentInfo *cms); +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags); +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + unsigned int flags); +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, X509 **signer, + X509_ALGOR **pdig, X509_ALGOR **psig); +int CMS_SignerInfo_sign(CMS_SignerInfo *si); +int CMS_SignerInfo_verify(CMS_SignerInfo *si); +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs); +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize); +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap); + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si); +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_signed_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si); +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +#ifdef HEADER_X509V3_H + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0(unsigned char *id, int idlen, + int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo); +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr); +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto); + +#endif + +/* BEGIN ERROR CODES */ +/* The following lines are auto generated by the script mkerr.pl. Any changes + * made after this point may be overwritten when the script is next run. + */ +void ERR_load_CMS_strings(void); + +/* Error codes for the CMS functions. */ + +/* Function codes. */ +#define CMS_F_CHECK_CONTENT 99 +#define CMS_F_CMS_ADD0_CERT 164 +#define CMS_F_CMS_ADD0_RECIPIENT_KEY 100 +#define CMS_F_CMS_ADD0_RECIPIENT_PASSWORD 165 +#define CMS_F_CMS_ADD1_RECEIPTREQUEST 158 +#define CMS_F_CMS_ADD1_RECIPIENT_CERT 101 +#define CMS_F_CMS_ADD1_SIGNER 102 +#define CMS_F_CMS_ADD1_SIGNINGTIME 103 +#define CMS_F_CMS_COMPRESS 104 +#define CMS_F_CMS_COMPRESSEDDATA_CREATE 105 +#define CMS_F_CMS_COMPRESSEDDATA_INIT_BIO 106 +#define CMS_F_CMS_COPY_CONTENT 107 +#define CMS_F_CMS_COPY_MESSAGEDIGEST 108 +#define CMS_F_CMS_DATA 109 +#define CMS_F_CMS_DATAFINAL 110 +#define CMS_F_CMS_DATAINIT 111 +#define CMS_F_CMS_DECRYPT 112 +#define CMS_F_CMS_DECRYPT_SET1_KEY 113 +#define CMS_F_CMS_DECRYPT_SET1_PASSWORD 166 +#define CMS_F_CMS_DECRYPT_SET1_PKEY 114 +#define CMS_F_CMS_DIGESTALGORITHM_FIND_CTX 115 +#define CMS_F_CMS_DIGESTALGORITHM_INIT_BIO 116 +#define CMS_F_CMS_DIGESTEDDATA_DO_FINAL 117 +#define CMS_F_CMS_DIGEST_VERIFY 118 +#define CMS_F_CMS_ENCODE_RECEIPT 161 +#define CMS_F_CMS_ENCRYPT 119 +#define CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO 120 +#define CMS_F_CMS_ENCRYPTEDDATA_DECRYPT 121 +#define CMS_F_CMS_ENCRYPTEDDATA_ENCRYPT 122 +#define CMS_F_CMS_ENCRYPTEDDATA_SET1_KEY 123 +#define CMS_F_CMS_ENVELOPEDDATA_CREATE 124 +#define CMS_F_CMS_ENVELOPEDDATA_INIT_BIO 125 +#define CMS_F_CMS_ENVELOPED_DATA_INIT 126 +#define CMS_F_CMS_FINAL 127 +#define CMS_F_CMS_GET0_CERTIFICATE_CHOICES 128 +#define CMS_F_CMS_GET0_CONTENT 129 +#define CMS_F_CMS_GET0_ECONTENT_TYPE 130 +#define CMS_F_CMS_GET0_ENVELOPED 131 +#define CMS_F_CMS_GET0_REVOCATION_CHOICES 132 +#define CMS_F_CMS_GET0_SIGNED 133 +#define CMS_F_CMS_MSGSIGDIGEST_ADD1 162 +#define CMS_F_CMS_RECEIPTREQUEST_CREATE0 159 +#define CMS_F_CMS_RECEIPT_VERIFY 160 +#define CMS_F_CMS_RECIPIENTINFO_DECRYPT 134 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT 135 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT 136 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID 137 +#define CMS_F_CMS_RECIPIENTINFO_KEKRI_ID_CMP 138 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_CERT_CMP 139 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT 140 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT 141 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS 142 +#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID 143 +#define CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT 167 +#define CMS_F_CMS_RECIPIENTINFO_SET0_KEY 144 +#define CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD 168 +#define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY 145 +#define CMS_F_CMS_SET1_SIGNERIDENTIFIER 146 +#define CMS_F_CMS_SET_DETACHED 147 +#define CMS_F_CMS_SIGN 148 +#define CMS_F_CMS_SIGNED_DATA_INIT 149 +#define CMS_F_CMS_SIGNERINFO_CONTENT_SIGN 150 +#define CMS_F_CMS_SIGNERINFO_SIGN 151 +#define CMS_F_CMS_SIGNERINFO_VERIFY 152 +#define CMS_F_CMS_SIGNERINFO_VERIFY_CERT 153 +#define CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT 154 +#define CMS_F_CMS_SIGN_RECEIPT 163 +#define CMS_F_CMS_STREAM 155 +#define CMS_F_CMS_UNCOMPRESS 156 +#define CMS_F_CMS_VERIFY 157 + +/* Reason codes. */ +#define CMS_R_ADD_SIGNER_ERROR 99 +#define CMS_R_CERTIFICATE_ALREADY_PRESENT 175 +#define CMS_R_CERTIFICATE_HAS_NO_KEYID 160 +#define CMS_R_CERTIFICATE_VERIFY_ERROR 100 +#define CMS_R_CIPHER_INITIALISATION_ERROR 101 +#define CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR 102 +#define CMS_R_CMS_DATAFINAL_ERROR 103 +#define CMS_R_CMS_LIB 104 +#define CMS_R_CONTENTIDENTIFIER_MISMATCH 170 +#define CMS_R_CONTENT_NOT_FOUND 105 +#define CMS_R_CONTENT_TYPE_MISMATCH 171 +#define CMS_R_CONTENT_TYPE_NOT_COMPRESSED_DATA 106 +#define CMS_R_CONTENT_TYPE_NOT_ENVELOPED_DATA 107 +#define CMS_R_CONTENT_TYPE_NOT_SIGNED_DATA 108 +#define CMS_R_CONTENT_VERIFY_ERROR 109 +#define CMS_R_CTRL_ERROR 110 +#define CMS_R_CTRL_FAILURE 111 +#define CMS_R_DECRYPT_ERROR 112 +#define CMS_R_DIGEST_ERROR 161 +#define CMS_R_ERROR_GETTING_PUBLIC_KEY 113 +#define CMS_R_ERROR_READING_MESSAGEDIGEST_ATTRIBUTE 114 +#define CMS_R_ERROR_SETTING_KEY 115 +#define CMS_R_ERROR_SETTING_RECIPIENTINFO 116 +#define CMS_R_INVALID_ENCRYPTED_KEY_LENGTH 117 +#define CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER 176 +#define CMS_R_INVALID_KEY_LENGTH 118 +#define CMS_R_MD_BIO_INIT_ERROR 119 +#define CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH 120 +#define CMS_R_MESSAGEDIGEST_WRONG_LENGTH 121 +#define CMS_R_MSGSIGDIGEST_ERROR 172 +#define CMS_R_MSGSIGDIGEST_VERIFICATION_FAILURE 162 +#define CMS_R_MSGSIGDIGEST_WRONG_LENGTH 163 +#define CMS_R_NEED_ONE_SIGNER 164 +#define CMS_R_NOT_A_SIGNED_RECEIPT 165 +#define CMS_R_NOT_ENCRYPTED_DATA 122 +#define CMS_R_NOT_KEK 123 +#define CMS_R_NOT_KEY_TRANSPORT 124 +#define CMS_R_NOT_PWRI 177 +#define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE 125 +#define CMS_R_NO_CIPHER 126 +#define CMS_R_NO_CONTENT 127 +#define CMS_R_NO_CONTENT_TYPE 173 +#define CMS_R_NO_DEFAULT_DIGEST 128 +#define CMS_R_NO_DIGEST_SET 129 +#define CMS_R_NO_KEY 130 +#define CMS_R_NO_KEY_OR_CERT 174 +#define CMS_R_NO_MATCHING_DIGEST 131 +#define CMS_R_NO_MATCHING_RECIPIENT 132 +#define CMS_R_NO_MATCHING_SIGNATURE 166 +#define CMS_R_NO_MSGSIGDIGEST 167 +#define CMS_R_NO_PASSWORD 178 +#define CMS_R_NO_PRIVATE_KEY 133 +#define CMS_R_NO_PUBLIC_KEY 134 +#define CMS_R_NO_RECEIPT_REQUEST 168 +#define CMS_R_NO_SIGNERS 135 +#define CMS_R_PRIVATE_KEY_DOES_NOT_MATCH_CERTIFICATE 136 +#define CMS_R_RECEIPT_DECODE_ERROR 169 +#define CMS_R_RECIPIENT_ERROR 137 +#define CMS_R_SIGNER_CERTIFICATE_NOT_FOUND 138 +#define CMS_R_SIGNFINAL_ERROR 139 +#define CMS_R_SMIME_TEXT_ERROR 140 +#define CMS_R_STORE_INIT_ERROR 141 +#define CMS_R_TYPE_NOT_COMPRESSED_DATA 142 +#define CMS_R_TYPE_NOT_DATA 143 +#define CMS_R_TYPE_NOT_DIGESTED_DATA 144 +#define CMS_R_TYPE_NOT_ENCRYPTED_DATA 145 +#define CMS_R_TYPE_NOT_ENVELOPED_DATA 146 +#define CMS_R_UNABLE_TO_FINALIZE_CONTEXT 147 +#define CMS_R_UNKNOWN_CIPHER 148 +#define CMS_R_UNKNOWN_DIGEST_ALGORIHM 149 +#define CMS_R_UNKNOWN_ID 150 +#define CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM 151 +#define CMS_R_UNSUPPORTED_CONTENT_TYPE 152 +#define CMS_R_UNSUPPORTED_KEK_ALGORITHM 153 +#define CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM 179 +#define CMS_R_UNSUPPORTED_RECIPIENT_TYPE 154 +#define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE 155 +#define CMS_R_UNSUPPORTED_TYPE 156 +#define CMS_R_UNWRAP_ERROR 157 +#define CMS_R_UNWRAP_FAILURE 180 +#define CMS_R_VERIFICATION_FAILURE 158 +#define CMS_R_WRAP_ERROR 159 + +#ifdef __cplusplus +} +#endif +#endif diff --git a/app/openssl/include/openssl/crypto.h b/app/openssl/include/openssl/crypto.h index b0360cec..f92fc518 100644 --- a/app/openssl/include/openssl/crypto.h +++ b/app/openssl/include/openssl/crypto.h @@ -488,10 +488,10 @@ void CRYPTO_get_mem_debug_functions(void (**m)(void *,int,const char *,int,int), long (**go)(void)); void *CRYPTO_malloc_locked(int num, const char *file, int line); -void CRYPTO_free_locked(void *); +void CRYPTO_free_locked(void *ptr); void *CRYPTO_malloc(int num, const char *file, int line); char *CRYPTO_strdup(const char *str, const char *file, int line); -void CRYPTO_free(void *); +void CRYPTO_free(void *ptr); void *CRYPTO_realloc(void *addr,int num, const char *file, int line); void *CRYPTO_realloc_clean(void *addr,int old_num,int num,const char *file, int line); @@ -547,6 +547,40 @@ unsigned long *OPENSSL_ia32cap_loc(void); #define OPENSSL_ia32cap (*(OPENSSL_ia32cap_loc())) int OPENSSL_isservice(void); +int FIPS_mode(void); +int FIPS_mode_set(int r); + +void OPENSSL_init(void); + +#define fips_md_init(alg) fips_md_init_ctx(alg, alg) + +#ifdef OPENSSL_FIPS +#define fips_md_init_ctx(alg, cx) \ + int alg##_Init(cx##_CTX *c) \ + { \ + if (FIPS_mode()) OpenSSLDie(__FILE__, __LINE__, \ + "Low level API call to digest " #alg " forbidden in FIPS mode!"); \ + return private_##alg##_Init(c); \ + } \ + int private_##alg##_Init(cx##_CTX *c) + +#define fips_cipher_abort(alg) \ + if (FIPS_mode()) OpenSSLDie(__FILE__, __LINE__, \ + "Low level API call to cipher " #alg " forbidden in FIPS mode!") + +#else +#define fips_md_init_ctx(alg, cx) \ + int alg##_Init(cx##_CTX *c) +#define fips_cipher_abort(alg) while(0) +#endif + +/* CRYPTO_memcmp returns zero iff the |len| bytes at |a| and |b| are equal. It + * takes an amount of time dependent on |len|, but independent of the contents + * of |a| and |b|. Unlike memcmp, it cannot be used to put elements into a + * defined order as the return value when a != b is undefined, other than to be + * non-zero. */ +int CRYPTO_memcmp(const void *a, const void *b, size_t len); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -562,11 +596,13 @@ void ERR_load_CRYPTO_strings(void); #define CRYPTO_F_CRYPTO_SET_EX_DATA 102 #define CRYPTO_F_DEF_ADD_INDEX 104 #define CRYPTO_F_DEF_GET_CLASS 105 +#define CRYPTO_F_FIPS_MODE_SET 109 #define CRYPTO_F_INT_DUP_EX_DATA 106 #define CRYPTO_F_INT_FREE_EX_DATA 107 #define CRYPTO_F_INT_NEW_EX_DATA 108 /* Reason codes. */ +#define CRYPTO_R_FIPS_MODE_NOT_SUPPORTED 101 #define CRYPTO_R_NO_DYNLOCK_CREATE_CALLBACK 100 #ifdef __cplusplus diff --git a/app/openssl/include/openssl/des.h b/app/openssl/include/openssl/des.h index 92b66635..1eaedcbd 100644 --- a/app/openssl/include/openssl/des.h +++ b/app/openssl/include/openssl/des.h @@ -224,6 +224,9 @@ int DES_set_key(const_DES_cblock *key,DES_key_schedule *schedule); int DES_key_sched(const_DES_cblock *key,DES_key_schedule *schedule); int DES_set_key_checked(const_DES_cblock *key,DES_key_schedule *schedule); void DES_set_key_unchecked(const_DES_cblock *key,DES_key_schedule *schedule); +#ifdef OPENSSL_FIPS +void private_DES_set_key_unchecked(const_DES_cblock *key,DES_key_schedule *schedule); +#endif void DES_string_to_key(const char *str,DES_cblock *key); void DES_string_to_2keys(const char *str,DES_cblock *key1,DES_cblock *key2); void DES_cfb64_encrypt(const unsigned char *in,unsigned char *out,long length, diff --git a/app/openssl/include/openssl/dh.h b/app/openssl/include/openssl/dh.h index 849309a4..ea59e610 100644 --- a/app/openssl/include/openssl/dh.h +++ b/app/openssl/include/openssl/dh.h @@ -86,6 +86,21 @@ * be used for all exponents. */ +/* If this flag is set the DH method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its reposibility + * to ensure the result is compliant. + */ + +#define DH_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define DH_FLAG_NON_FIPS_ALLOW 0x0400 + #ifdef __cplusplus extern "C" { #endif @@ -230,6 +245,9 @@ void ERR_load_DH_strings(void); #define DH_F_COMPUTE_KEY 102 #define DH_F_DHPARAMS_PRINT_FP 101 #define DH_F_DH_BUILTIN_GENPARAMS 106 +#define DH_F_DH_COMPUTE_KEY 114 +#define DH_F_DH_GENERATE_KEY 115 +#define DH_F_DH_GENERATE_PARAMETERS_EX 116 #define DH_F_DH_NEW_METHOD 105 #define DH_F_DH_PARAM_DECODE 107 #define DH_F_DH_PRIV_DECODE 110 @@ -249,7 +267,9 @@ void ERR_load_DH_strings(void); #define DH_R_DECODE_ERROR 104 #define DH_R_INVALID_PUBKEY 102 #define DH_R_KEYS_NOT_SET 108 +#define DH_R_KEY_SIZE_TOO_SMALL 110 #define DH_R_MODULUS_TOO_LARGE 103 +#define DH_R_NON_FIPS_METHOD 111 #define DH_R_NO_PARAMETERS_SET 107 #define DH_R_NO_PRIVATE_VALUE 100 #define DH_R_PARAMETER_ENCODING_ERROR 105 diff --git a/app/openssl/include/openssl/dsa.h b/app/openssl/include/openssl/dsa.h index ac50a5c8..7531c653 100644 --- a/app/openssl/include/openssl/dsa.h +++ b/app/openssl/include/openssl/dsa.h @@ -96,6 +96,25 @@ * faster variable sliding window method to * be used for all exponents. */ +#define DSA_FLAG_NONCE_FROM_HASH 0x04 /* Causes the DSA nonce to be calculated + from SHA512(private_key + H(message) + + random). This strengthens DSA against a + weak PRNG. */ + +/* If this flag is set the DSA method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its reposibility + * to ensure the result is compliant. + */ + +#define DSA_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define DSA_FLAG_NON_FIPS_ALLOW 0x0400 #ifdef __cplusplus extern "C" { @@ -115,8 +134,9 @@ struct dsa_method { const char *name; DSA_SIG * (*dsa_do_sign)(const unsigned char *dgst, int dlen, DSA *dsa); - int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, - BIGNUM **rp); + int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, + BIGNUM **kinvp, BIGNUM **rp, + const unsigned char *dgst, int dlen); int (*dsa_do_verify)(const unsigned char *dgst, int dgst_len, DSA_SIG *sig, DSA *dsa); int (*dsa_mod_exp)(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, @@ -272,6 +292,8 @@ void ERR_load_DSA_strings(void); #define DSA_F_DSAPARAMS_PRINT_FP 101 #define DSA_F_DSA_DO_SIGN 112 #define DSA_F_DSA_DO_VERIFY 113 +#define DSA_F_DSA_GENERATE_KEY 124 +#define DSA_F_DSA_GENERATE_PARAMETERS_EX 123 #define DSA_F_DSA_NEW_METHOD 103 #define DSA_F_DSA_PARAM_DECODE 119 #define DSA_F_DSA_PRINT_FP 105 @@ -282,6 +304,7 @@ void ERR_load_DSA_strings(void); #define DSA_F_DSA_SIGN 106 #define DSA_F_DSA_SIGN_SETUP 107 #define DSA_F_DSA_SIG_NEW 109 +#define DSA_F_DSA_SIG_PRINT 125 #define DSA_F_DSA_VERIFY 108 #define DSA_F_I2D_DSA_SIG 111 #define DSA_F_OLD_DSA_PRIV_DECODE 122 @@ -298,6 +321,9 @@ void ERR_load_DSA_strings(void); #define DSA_R_INVALID_DIGEST_TYPE 106 #define DSA_R_MISSING_PARAMETERS 101 #define DSA_R_MODULUS_TOO_LARGE 103 +#define DSA_R_NEED_NEW_SETUP_VALUES 110 +#define DSA_R_NONCE_CANNOT_BE_PRECOMPUTED 112 +#define DSA_R_NON_FIPS_DSA_METHOD 111 #define DSA_R_NO_PARAMETERS_SET 107 #define DSA_R_PARAMETER_ENCODING_ERROR 105 diff --git a/app/openssl/include/openssl/dtls1.h b/app/openssl/include/openssl/dtls1.h index 2900d1d8..e65d5011 100644 --- a/app/openssl/include/openssl/dtls1.h +++ b/app/openssl/include/openssl/dtls1.h @@ -57,8 +57,8 @@ * */ -#ifndef HEADER_DTLS1_H -#define HEADER_DTLS1_H +#ifndef HEADER_DTLS1_H +#define HEADER_DTLS1_H #include #include @@ -72,8 +72,12 @@ #elif defined(OPENSSL_SYS_NETWARE) && !defined(_WINSOCK2API_) #include #else +#if defined(OPENSSL_SYS_VXWORKS) +#include +#else #include #endif +#endif #ifdef __cplusplus extern "C" { @@ -105,6 +109,11 @@ extern "C" { #define DTLS1_AL_HEADER_LENGTH 2 #endif +#ifndef OPENSSL_NO_SSL_INTERN + +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_AUTH_LABEL "EXPORTER_DTLS_OVER_SCTP" +#endif typedef struct dtls1_bitmap_st { @@ -227,7 +236,7 @@ typedef struct dtls1_state_st struct dtls1_timeout_st timeout; - /* Indicates when the last handshake msg sent will timeout */ + /* Indicates when the last handshake msg or heartbeat sent will timeout */ struct timeval next_timeout; /* Timeout duration */ @@ -243,6 +252,13 @@ typedef struct dtls1_state_st unsigned int retransmitting; unsigned int change_cipher_spec_ok; +#ifndef OPENSSL_NO_SCTP + /* used when SSL_ST_XX_FLUSH is entered */ + int next_state; + + int shutdown_received; +#endif + } DTLS1_STATE; typedef struct dtls1_record_data_st @@ -251,8 +267,12 @@ typedef struct dtls1_record_data_st unsigned int packet_length; SSL3_BUFFER rbuf; SSL3_RECORD rrec; +#ifndef OPENSSL_NO_SCTP + struct bio_dgram_sctp_rcvinfo recordinfo; +#endif } DTLS1_RECORD_DATA; +#endif /* Timeout multipliers (timeout slice is defined in apps/timeouts.h */ #define DTLS1_TMO_READ_COUNT 2 diff --git a/app/openssl/include/openssl/e_os2.h b/app/openssl/include/openssl/e_os2.h index 4c785c62..d22c0368 100644 --- a/app/openssl/include/openssl/e_os2.h +++ b/app/openssl/include/openssl/e_os2.h @@ -193,8 +193,14 @@ extern "C" { #endif /* --------------------------------- VOS ----------------------------------- */ -#ifdef OPENSSL_SYSNAME_VOS +#if defined(__VOS__) || defined(OPENSSL_SYSNAME_VOS) # define OPENSSL_SYS_VOS +#ifdef __HPPA__ +# define OPENSSL_SYS_VOS_HPPA +#endif +#ifdef __IA32__ +# define OPENSSL_SYS_VOS_IA32 +#endif #endif /* ------------------------------- VxWorks --------------------------------- */ @@ -283,6 +289,26 @@ extern "C" { # define OPENSSL_GLOBAL_REF(name) _shadow_##name #endif +#if defined(OPENSSL_SYS_MACINTOSH_CLASSIC) && macintosh==1 && !defined(MAC_OS_GUSI_SOURCE) +# define ossl_ssize_t long +#endif + +#ifdef OPENSSL_SYS_MSDOS +# define ossl_ssize_t long +#endif + +#if defined(NeXT) || defined(OPENSSL_SYS_NEWS4) || defined(OPENSSL_SYS_SUNOS) +# define ssize_t int +#endif + +#if defined(__ultrix) && !defined(ssize_t) +# define ossl_ssize_t int +#endif + +#ifndef ossl_ssize_t +# define ossl_ssize_t ssize_t +#endif + #ifdef __cplusplus } #endif diff --git a/app/openssl/include/openssl/ec.h b/app/openssl/include/openssl/ec.h index ee707813..d008a0da 100644 --- a/app/openssl/include/openssl/ec.h +++ b/app/openssl/include/openssl/ec.h @@ -151,7 +151,24 @@ const EC_METHOD *EC_GFp_mont_method(void); */ const EC_METHOD *EC_GFp_nist_method(void); +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +/** Returns 64-bit optimized methods for nistp224 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp224_method(void); + +/** Returns 64-bit optimized methods for nistp256 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp256_method(void); + +/** Returns 64-bit optimized methods for nistp521 + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistp521_method(void); +#endif +#ifndef OPENSSL_NO_EC2M /********************************************************************/ /* EC_METHOD for curves over GF(2^m) */ /********************************************************************/ @@ -161,6 +178,8 @@ const EC_METHOD *EC_GFp_nist_method(void); */ const EC_METHOD *EC_GF2m_simple_method(void); +#endif + /********************************************************************/ /* EC_GROUP functions */ @@ -255,10 +274,10 @@ int EC_GROUP_get_curve_name(const EC_GROUP *group); void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag); int EC_GROUP_get_asn1_flag(const EC_GROUP *group); -void EC_GROUP_set_point_conversion_form(EC_GROUP *, point_conversion_form_t); +void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form); point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *); -unsigned char *EC_GROUP_get0_seed(const EC_GROUP *); +unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x); size_t EC_GROUP_get_seed_len(const EC_GROUP *); size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len); @@ -282,6 +301,7 @@ int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, co */ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); +#ifndef OPENSSL_NO_EC2M /** Sets the parameter of a ec over GF2m defined by y^2 + x*y = x^3 + a*x^2 + b * \param group EC_GROUP object * \param p BIGNUM with the polynomial defining the underlying field @@ -301,7 +321,7 @@ int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, c * \return 1 on success and 0 if an error occured */ int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); - +#endif /** Returns the number of bits needed to represent a field element * \param group EC_GROUP object * \return number of bits needed to represent a field element @@ -342,7 +362,7 @@ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx); * \return newly created EC_GROUP object with the specified parameters */ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); - +#ifndef OPENSSL_NO_EC2M /** Creates a new EC_GROUP object with the specified parameters defined * over GF2m (defined by the equation y^2 + x*y = x^3 + a*x^2 + b) * \param p BIGNUM with the polynomial defining the underlying field @@ -352,7 +372,7 @@ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM * \return newly created EC_GROUP object with the specified parameters */ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); - +#endif /** Creates a EC_GROUP object with a curve specified by a NID * \param nid NID of the OID of the curve name * \return newly created EC_GROUP object with specified curve or NULL @@ -481,7 +501,7 @@ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, */ int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, const BIGNUM *x, int y_bit, BN_CTX *ctx); - +#ifndef OPENSSL_NO_EC2M /** Sets the affine coordinates of a EC_POINT over GF2m * \param group underlying EC_GROUP object * \param p EC_POINT object @@ -514,7 +534,7 @@ int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, */ int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, const BIGNUM *x, int y_bit, BN_CTX *ctx); - +#endif /** Encodes a EC_POINT object to a octet string * \param group underlying EC_GROUP object * \param p EC_POINT object @@ -606,8 +626,8 @@ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *c */ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); -int EC_POINT_make_affine(const EC_GROUP *, EC_POINT *, BN_CTX *); -int EC_POINTs_make_affine(const EC_GROUP *, size_t num, EC_POINT *[], BN_CTX *); +int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx); +int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx); /** Computes r = generator * n sum_{i=0}^num p[i] * m[i] * \param group underlying EC_GROUP object @@ -653,9 +673,11 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group); /* EC_GROUP_get_basis_type() returns the NID of the basis type * used to represent the field elements */ int EC_GROUP_get_basis_type(const EC_GROUP *); +#ifndef OPENSSL_NO_EC2M int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, unsigned int *k2, unsigned int *k3); +#endif #define OPENSSL_EC_NAMED_CURVE 0x001 @@ -689,11 +711,21 @@ typedef struct ec_key_st EC_KEY; #define EC_PKEY_NO_PARAMETERS 0x001 #define EC_PKEY_NO_PUBKEY 0x002 +/* some values for the flags field */ +#define EC_FLAG_NON_FIPS_ALLOW 0x1 +#define EC_FLAG_FIPS_CHECKED 0x2 + /** Creates a new EC_KEY object. * \return EC_KEY object or NULL if an error occurred. */ EC_KEY *EC_KEY_new(void); +int EC_KEY_get_flags(const EC_KEY *key); + +void EC_KEY_set_flags(EC_KEY *key, int flags); + +void EC_KEY_clear_flags(EC_KEY *key, int flags); + /** Creates a new EC_KEY object using a named curve as underlying * EC_GROUP object. * \param nid NID of the named curve. @@ -768,16 +800,35 @@ const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key); int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub); unsigned EC_KEY_get_enc_flags(const EC_KEY *key); -void EC_KEY_set_enc_flags(EC_KEY *, unsigned int); -point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *); -void EC_KEY_set_conv_form(EC_KEY *, point_conversion_form_t); +void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags); +point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); +void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); /* functions to set/get method specific data */ -void *EC_KEY_get_key_method_data(EC_KEY *, +void *EC_KEY_get_key_method_data(EC_KEY *key, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); -void EC_KEY_insert_key_method_data(EC_KEY *, void *data, +/** Sets the key method data of an EC_KEY object, if none has yet been set. + * \param key EC_KEY object + * \param data opaque data to install. + * \param dup_func a function that duplicates |data|. + * \param free_func a function that frees |data|. + * \param clear_free_func a function that wipes and frees |data|. + * \return the previously set data pointer, or NULL if |data| was inserted. + */ +void *EC_KEY_insert_key_method_data(EC_KEY *key, void *data, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); /* wrapper functions for the underlying EC_GROUP object */ -void EC_KEY_set_asn1_flag(EC_KEY *, int); +void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag); + +/** Sets whether ECDSA operations with the given key will calculate their k + * value from SHA512(private_key + message + random) in order to protect + * against a weak PRNG. + * \param on Whether to calculate k from a hash or not + */ +void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on); + +/** Returns the value of nonce_from_hash + */ +int EC_KEY_get_nonce_from_hash(const EC_KEY *key); /** Creates a table of pre-computed multiples of the generator to * accelerate further EC_KEY operations. @@ -799,6 +850,15 @@ int EC_KEY_generate_key(EC_KEY *key); */ int EC_KEY_check_key(const EC_KEY *key); +/** Sets a public key from affine coordindates performing + * neccessary NIST PKV tests. + * \param key the EC_KEY object + * \param x public key x coordinate + * \param y public key y coordinate + * \return 1 on success and 0 otherwise. + */ +int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y); + /********************************************************************/ /* de- and encoding functions for SEC1 ECPrivateKey */ @@ -926,6 +986,7 @@ void ERR_load_EC_strings(void); /* Error codes for the EC functions. */ /* Function codes. */ +#define EC_F_BN_TO_FELEM 224 #define EC_F_COMPUTE_WNAF 143 #define EC_F_D2I_ECPARAMETERS 144 #define EC_F_D2I_ECPKPARAMETERS 145 @@ -968,6 +1029,15 @@ void ERR_load_EC_strings(void); #define EC_F_EC_GFP_MONT_FIELD_SQR 132 #define EC_F_EC_GFP_MONT_GROUP_SET_CURVE 189 #define EC_F_EC_GFP_MONT_GROUP_SET_CURVE_GFP 135 +#define EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE 225 +#define EC_F_EC_GFP_NISTP224_POINTS_MUL 228 +#define EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES 226 +#define EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE 230 +#define EC_F_EC_GFP_NISTP256_POINTS_MUL 231 +#define EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES 232 +#define EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE 233 +#define EC_F_EC_GFP_NISTP521_POINTS_MUL 234 +#define EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES 235 #define EC_F_EC_GFP_NIST_FIELD_MUL 200 #define EC_F_EC_GFP_NIST_FIELD_SQR 201 #define EC_F_EC_GFP_NIST_GROUP_SET_CURVE 202 @@ -1010,6 +1080,7 @@ void ERR_load_EC_strings(void); #define EC_F_EC_KEY_NEW 182 #define EC_F_EC_KEY_PRINT 180 #define EC_F_EC_KEY_PRINT_FP 181 +#define EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES 229 #define EC_F_EC_POINTS_MAKE_AFFINE 136 #define EC_F_EC_POINT_ADD 112 #define EC_F_EC_POINT_CMP 113 @@ -1040,6 +1111,9 @@ void ERR_load_EC_strings(void); #define EC_F_I2D_ECPKPARAMETERS 191 #define EC_F_I2D_ECPRIVATEKEY 192 #define EC_F_I2O_ECPUBLICKEY 151 +#define EC_F_NISTP224_PRE_COMP_NEW 227 +#define EC_F_NISTP256_PRE_COMP_NEW 236 +#define EC_F_NISTP521_PRE_COMP_NEW 237 #define EC_F_O2I_ECPUBLICKEY 152 #define EC_F_OLD_EC_PRIV_DECODE 222 #define EC_F_PKEY_EC_CTRL 197 @@ -1052,12 +1126,15 @@ void ERR_load_EC_strings(void); /* Reason codes. */ #define EC_R_ASN1_ERROR 115 #define EC_R_ASN1_UNKNOWN_FIELD 116 +#define EC_R_BIGNUM_OUT_OF_RANGE 144 #define EC_R_BUFFER_TOO_SMALL 100 +#define EC_R_COORDINATES_OUT_OF_RANGE 146 #define EC_R_D2I_ECPKPARAMETERS_FAILURE 117 #define EC_R_DECODE_ERROR 142 #define EC_R_DISCRIMINANT_IS_ZERO 118 #define EC_R_EC_GROUP_NEW_BY_NAME_FAILURE 119 #define EC_R_FIELD_TOO_LARGE 143 +#define EC_R_GF2M_NOT_SUPPORTED 147 #define EC_R_GROUP2PKPARAMETERS_FAILURE 120 #define EC_R_I2D_ECPKPARAMETERS_FAILURE 121 #define EC_R_INCOMPATIBLE_OBJECTS 101 @@ -1092,6 +1169,7 @@ void ERR_load_EC_strings(void); #define EC_R_UNKNOWN_GROUP 129 #define EC_R_UNKNOWN_ORDER 114 #define EC_R_UNSUPPORTED_FIELD 131 +#define EC_R_WRONG_CURVE_PARAMETERS 145 #define EC_R_WRONG_ORDER 130 #ifdef __cplusplus diff --git a/app/openssl/include/openssl/ecdh.h b/app/openssl/include/openssl/ecdh.h index b4b58ee6..8887102c 100644 --- a/app/openssl/include/openssl/ecdh.h +++ b/app/openssl/include/openssl/ecdh.h @@ -109,11 +109,13 @@ void ERR_load_ECDH_strings(void); /* Error codes for the ECDH functions. */ /* Function codes. */ +#define ECDH_F_ECDH_CHECK 102 #define ECDH_F_ECDH_COMPUTE_KEY 100 #define ECDH_F_ECDH_DATA_NEW_METHOD 101 /* Reason codes. */ #define ECDH_R_KDF_FAILED 102 +#define ECDH_R_NON_FIPS_METHOD 103 #define ECDH_R_NO_PRIVATE_VALUE 100 #define ECDH_R_POINT_ARITHMETIC_FAILURE 101 diff --git a/app/openssl/include/openssl/ecdsa.h b/app/openssl/include/openssl/ecdsa.h index e61c5398..dc6a36b1 100644 --- a/app/openssl/include/openssl/ecdsa.h +++ b/app/openssl/include/openssl/ecdsa.h @@ -238,6 +238,7 @@ void ERR_load_ECDSA_strings(void); /* Error codes for the ECDSA functions. */ /* Function codes. */ +#define ECDSA_F_ECDSA_CHECK 104 #define ECDSA_F_ECDSA_DATA_NEW_METHOD 100 #define ECDSA_F_ECDSA_DO_SIGN 101 #define ECDSA_F_ECDSA_DO_VERIFY 102 @@ -249,6 +250,8 @@ void ERR_load_ECDSA_strings(void); #define ECDSA_R_ERR_EC_LIB 102 #define ECDSA_R_MISSING_PARAMETERS 103 #define ECDSA_R_NEED_NEW_SETUP_VALUES 106 +#define ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED 108 +#define ECDSA_R_NON_FIPS_METHOD 107 #define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104 #define ECDSA_R_SIGNATURE_MALLOC_FAILED 105 diff --git a/app/openssl/include/openssl/engine.h b/app/openssl/include/openssl/engine.h index 943aeae2..f8be4977 100644 --- a/app/openssl/include/openssl/engine.h +++ b/app/openssl/include/openssl/engine.h @@ -141,6 +141,13 @@ extern "C" { * the existing ENGINE's structural reference count. */ #define ENGINE_FLAGS_BY_ID_COPY (int)0x0004 +/* This flag if for an ENGINE that does not want its methods registered as + * part of ENGINE_register_all_complete() for example if the methods are + * not usable as default methods. + */ + +#define ENGINE_FLAGS_NO_REGISTER_ALL (int)0x0008 + /* ENGINEs can support their own command types, and these flags are used in * ENGINE_CTRL_GET_CMD_FLAGS to indicate to the caller what kind of input each * command expects. Currently only numeric and string input is supported. If a @@ -344,6 +351,8 @@ void ENGINE_load_gost(void); #endif #endif void ENGINE_load_cryptodev(void); +void ENGINE_load_rsax(void); +void ENGINE_load_rdrand(void); void ENGINE_load_builtin_engines(void); /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation diff --git a/app/openssl/include/openssl/err.h b/app/openssl/include/openssl/err.h index b9f8c16d..974cc9cc 100644 --- a/app/openssl/include/openssl/err.h +++ b/app/openssl/include/openssl/err.h @@ -344,8 +344,9 @@ void ERR_print_errors_fp(FILE *fp); #endif #ifndef OPENSSL_NO_BIO void ERR_print_errors(BIO *bp); -void ERR_add_error_data(int num, ...); #endif +void ERR_add_error_data(int num, ...); +void ERR_add_error_vdata(int num, va_list args); void ERR_load_strings(int lib,ERR_STRING_DATA str[]); void ERR_unload_strings(int lib,ERR_STRING_DATA str[]); void ERR_load_ERR_strings(void); diff --git a/app/openssl/include/openssl/evp.h b/app/openssl/include/openssl/evp.h index 9f9795e2..e43a58e6 100644 --- a/app/openssl/include/openssl/evp.h +++ b/app/openssl/include/openssl/evp.h @@ -83,7 +83,7 @@ #define EVP_RC5_32_12_16_KEY_SIZE 16 */ #define EVP_MAX_MD_SIZE 64 /* longest known is SHA512 */ -#define EVP_MAX_KEY_LENGTH 32 +#define EVP_MAX_KEY_LENGTH 64 #define EVP_MAX_IV_LENGTH 16 #define EVP_MAX_BLOCK_LENGTH 32 @@ -116,6 +116,7 @@ #define EVP_PKEY_DH NID_dhKeyAgreement #define EVP_PKEY_EC NID_X9_62_id_ecPublicKey #define EVP_PKEY_HMAC NID_hmac +#define EVP_PKEY_CMAC NID_cmac #ifdef __cplusplus extern "C" { @@ -216,6 +217,8 @@ typedef int evp_verify_method(int type,const unsigned char *m, #define EVP_MD_FLAG_DIGALGID_CUSTOM 0x0018 +#define EVP_MD_FLAG_FIPS 0x0400 /* Note if suitable for use in FIPS mode */ + /* Digest ctrls */ #define EVP_MD_CTRL_DIGALGID 0x1 @@ -325,6 +328,10 @@ struct evp_cipher_st #define EVP_CIPH_CBC_MODE 0x2 #define EVP_CIPH_CFB_MODE 0x3 #define EVP_CIPH_OFB_MODE 0x4 +#define EVP_CIPH_CTR_MODE 0x5 +#define EVP_CIPH_GCM_MODE 0x6 +#define EVP_CIPH_CCM_MODE 0x7 +#define EVP_CIPH_XTS_MODE 0x10001 #define EVP_CIPH_MODE 0xF0007 /* Set if variable length cipher */ #define EVP_CIPH_VARIABLE_LENGTH 0x8 @@ -346,6 +353,15 @@ struct evp_cipher_st #define EVP_CIPH_FLAG_DEFAULT_ASN1 0x1000 /* Buffer length in bits not bytes: CFB1 mode only */ #define EVP_CIPH_FLAG_LENGTH_BITS 0x2000 +/* Note if suitable for use in FIPS mode */ +#define EVP_CIPH_FLAG_FIPS 0x4000 +/* Allow non FIPS cipher in FIPS mode */ +#define EVP_CIPH_FLAG_NON_FIPS_ALLOW 0x8000 +/* Cipher handles any and all padding logic as well + * as finalisation. + */ +#define EVP_CIPH_FLAG_CUSTOM_CIPHER 0x100000 +#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 /* ctrl() values */ @@ -358,6 +374,33 @@ struct evp_cipher_st #define EVP_CTRL_RAND_KEY 0x6 #define EVP_CTRL_PBE_PRF_NID 0x7 #define EVP_CTRL_COPY 0x8 +#define EVP_CTRL_GCM_SET_IVLEN 0x9 +#define EVP_CTRL_GCM_GET_TAG 0x10 +#define EVP_CTRL_GCM_SET_TAG 0x11 +#define EVP_CTRL_GCM_SET_IV_FIXED 0x12 +#define EVP_CTRL_GCM_IV_GEN 0x13 +#define EVP_CTRL_CCM_SET_IVLEN EVP_CTRL_GCM_SET_IVLEN +#define EVP_CTRL_CCM_GET_TAG EVP_CTRL_GCM_GET_TAG +#define EVP_CTRL_CCM_SET_TAG EVP_CTRL_GCM_SET_TAG +#define EVP_CTRL_CCM_SET_L 0x14 +#define EVP_CTRL_CCM_SET_MSGLEN 0x15 +/* AEAD cipher deduces payload length and returns number of bytes + * required to store MAC and eventual padding. Subsequent call to + * EVP_Cipher even appends/verifies MAC. + */ +#define EVP_CTRL_AEAD_TLS1_AAD 0x16 +/* Used by composite AEAD ciphers, no-op in GCM, CCM... */ +#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +/* Set the GCM invocation field, decrypt only */ +#define EVP_CTRL_GCM_SET_IV_INV 0x18 + +/* GCM TLS constants */ +/* Length of fixed part of IV derived from PRF */ +#define EVP_GCM_TLS_FIXED_IV_LEN 4 +/* Length of explicit part of IV part of TLS records */ +#define EVP_GCM_TLS_EXPLICIT_IV_LEN 8 +/* Length of tag for TLS */ +#define EVP_GCM_TLS_TAG_LEN 16 typedef struct evp_cipher_info_st { @@ -375,7 +418,7 @@ struct evp_cipher_ctx_st unsigned char oiv[EVP_MAX_IV_LENGTH]; /* original iv */ unsigned char iv[EVP_MAX_IV_LENGTH]; /* working iv */ unsigned char buf[EVP_MAX_BLOCK_LENGTH];/* saved partial block */ - int num; /* used by cfb/ofb mode */ + int num; /* used by cfb/ofb/ctr mode */ void *app_data; /* application stuff */ int key_len; /* May change for variable length cipher */ @@ -695,6 +738,9 @@ const EVP_MD *EVP_dev_crypto_md5(void); #ifndef OPENSSL_NO_RC4 const EVP_CIPHER *EVP_rc4(void); const EVP_CIPHER *EVP_rc4_40(void); +#ifndef OPENSSL_NO_MD5 +const EVP_CIPHER *EVP_rc4_hmac_md5(void); +#endif #endif #ifndef OPENSSL_NO_IDEA const EVP_CIPHER *EVP_idea_ecb(void); @@ -741,9 +787,10 @@ const EVP_CIPHER *EVP_aes_128_cfb8(void); const EVP_CIPHER *EVP_aes_128_cfb128(void); # define EVP_aes_128_cfb EVP_aes_128_cfb128 const EVP_CIPHER *EVP_aes_128_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_128_ctr(void); -#endif +const EVP_CIPHER *EVP_aes_128_ccm(void); +const EVP_CIPHER *EVP_aes_128_gcm(void); +const EVP_CIPHER *EVP_aes_128_xts(void); const EVP_CIPHER *EVP_aes_192_ecb(void); const EVP_CIPHER *EVP_aes_192_cbc(void); const EVP_CIPHER *EVP_aes_192_cfb1(void); @@ -751,9 +798,9 @@ const EVP_CIPHER *EVP_aes_192_cfb8(void); const EVP_CIPHER *EVP_aes_192_cfb128(void); # define EVP_aes_192_cfb EVP_aes_192_cfb128 const EVP_CIPHER *EVP_aes_192_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_192_ctr(void); -#endif +const EVP_CIPHER *EVP_aes_192_ccm(void); +const EVP_CIPHER *EVP_aes_192_gcm(void); const EVP_CIPHER *EVP_aes_256_ecb(void); const EVP_CIPHER *EVP_aes_256_cbc(void); const EVP_CIPHER *EVP_aes_256_cfb1(void); @@ -761,8 +808,13 @@ const EVP_CIPHER *EVP_aes_256_cfb8(void); const EVP_CIPHER *EVP_aes_256_cfb128(void); # define EVP_aes_256_cfb EVP_aes_256_cfb128 const EVP_CIPHER *EVP_aes_256_ofb(void); -#if 0 const EVP_CIPHER *EVP_aes_256_ctr(void); +const EVP_CIPHER *EVP_aes_256_ccm(void); +const EVP_CIPHER *EVP_aes_256_gcm(void); +const EVP_CIPHER *EVP_aes_256_xts(void); +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void); +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void); #endif #endif #ifndef OPENSSL_NO_CAMELLIA @@ -869,6 +921,7 @@ struct ec_key_st *EVP_PKEY_get1_EC_KEY(EVP_PKEY *pkey); #endif EVP_PKEY * EVP_PKEY_new(void); +EVP_PKEY * EVP_PKEY_dup(EVP_PKEY *pkey); void EVP_PKEY_free(EVP_PKEY *pkey); EVP_PKEY * d2i_PublicKey(int type,EVP_PKEY **a, const unsigned char **pp, @@ -1047,13 +1100,22 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, #define EVP_PKEY_CTRL_CMS_DECRYPT 10 #define EVP_PKEY_CTRL_CMS_SIGN 11 +#define EVP_PKEY_CTRL_CIPHER 12 + #define EVP_PKEY_ALG_CTRL 0x1000 #define EVP_PKEY_FLAG_AUTOARGLEN 2 +/* Method handles all operations: don't assume any digest related + * defaults. + */ +#define EVP_PKEY_FLAG_SIGCTX_CUSTOM 4 const EVP_PKEY_METHOD *EVP_PKEY_meth_find(int type); EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags); +void EVP_PKEY_meth_get0_info(int *ppkey_id, int *pflags, + const EVP_PKEY_METHOD *meth); +void EVP_PKEY_meth_copy(EVP_PKEY_METHOD *dst, const EVP_PKEY_METHOD *src); void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth); int EVP_PKEY_meth_add0(const EVP_PKEY_METHOD *pmeth); @@ -1071,7 +1133,7 @@ int EVP_PKEY_CTX_get_operation(EVP_PKEY_CTX *ctx); void EVP_PKEY_CTX_set0_keygen_info(EVP_PKEY_CTX *ctx, int *dat, int datlen); EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e, - unsigned char *key, int keylen); + const unsigned char *key, int keylen); void EVP_PKEY_CTX_set_data(EVP_PKEY_CTX *ctx, void *data); void *EVP_PKEY_CTX_get_data(EVP_PKEY_CTX *ctx); @@ -1181,6 +1243,8 @@ void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth, int (*ctrl_str)(EVP_PKEY_CTX *ctx, const char *type, const char *value)); +void EVP_add_alg_module(void); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -1190,8 +1254,14 @@ void ERR_load_EVP_strings(void); /* Error codes for the EVP functions. */ /* Function codes. */ +#define EVP_F_AESNI_INIT_KEY 165 +#define EVP_F_AESNI_XTS_CIPHER 176 #define EVP_F_AES_INIT_KEY 133 +#define EVP_F_AES_XTS 172 +#define EVP_F_AES_XTS_CIPHER 175 +#define EVP_F_ALG_MODULE_INIT 177 #define EVP_F_CAMELLIA_INIT_KEY 159 +#define EVP_F_CMAC_INIT 173 #define EVP_F_D2I_PKEY 100 #define EVP_F_DO_SIGVER_INIT 161 #define EVP_F_DSAPKEY2PKCS8 134 @@ -1246,15 +1316,24 @@ void ERR_load_EVP_strings(void); #define EVP_F_EVP_RIJNDAEL 126 #define EVP_F_EVP_SIGNFINAL 107 #define EVP_F_EVP_VERIFYFINAL 108 +#define EVP_F_FIPS_CIPHERINIT 166 +#define EVP_F_FIPS_CIPHER_CTX_COPY 170 +#define EVP_F_FIPS_CIPHER_CTX_CTRL 167 +#define EVP_F_FIPS_CIPHER_CTX_SET_KEY_LENGTH 171 +#define EVP_F_FIPS_DIGESTINIT 168 +#define EVP_F_FIPS_MD_CTX_COPY 169 +#define EVP_F_HMAC_INIT_EX 174 #define EVP_F_INT_CTX_NEW 157 #define EVP_F_PKCS5_PBE_KEYIVGEN 117 #define EVP_F_PKCS5_V2_PBE_KEYIVGEN 118 +#define EVP_F_PKCS5_V2_PBKDF2_KEYIVGEN 164 #define EVP_F_PKCS8_SET_BROKEN 112 #define EVP_F_PKEY_SET_TYPE 158 #define EVP_F_RC2_MAGIC_TO_METH 109 #define EVP_F_RC5_CTRL 125 /* Reason codes. */ +#define EVP_R_AES_IV_SETUP_FAILED 162 #define EVP_R_AES_KEY_SETUP_FAILED 143 #define EVP_R_ASN1_LIB 140 #define EVP_R_BAD_BLOCK_LENGTH 136 @@ -1272,16 +1351,21 @@ void ERR_load_EVP_strings(void); #define EVP_R_DECODE_ERROR 114 #define EVP_R_DIFFERENT_KEY_TYPES 101 #define EVP_R_DIFFERENT_PARAMETERS 153 +#define EVP_R_DISABLED_FOR_FIPS 163 #define EVP_R_ENCODE_ERROR 115 +#define EVP_R_ERROR_LOADING_SECTION 165 +#define EVP_R_ERROR_SETTING_FIPS_MODE 166 #define EVP_R_EVP_PBE_CIPHERINIT_ERROR 119 #define EVP_R_EXPECTING_AN_RSA_KEY 127 #define EVP_R_EXPECTING_A_DH_KEY 128 #define EVP_R_EXPECTING_A_DSA_KEY 129 #define EVP_R_EXPECTING_A_ECDSA_KEY 141 #define EVP_R_EXPECTING_A_EC_KEY 142 +#define EVP_R_FIPS_MODE_NOT_SUPPORTED 167 #define EVP_R_INITIALIZATION_ERROR 134 #define EVP_R_INPUT_NOT_INITIALIZED 111 #define EVP_R_INVALID_DIGEST 152 +#define EVP_R_INVALID_FIPS_MODE 168 #define EVP_R_INVALID_KEY_LENGTH 130 #define EVP_R_INVALID_OPERATION 148 #define EVP_R_IV_TOO_LARGE 102 @@ -1303,8 +1387,10 @@ void ERR_load_EVP_strings(void); #define EVP_R_PRIVATE_KEY_DECODE_ERROR 145 #define EVP_R_PRIVATE_KEY_ENCODE_ERROR 146 #define EVP_R_PUBLIC_KEY_NOT_RSA 106 +#define EVP_R_TOO_LARGE 164 #define EVP_R_UNKNOWN_CIPHER 160 #define EVP_R_UNKNOWN_DIGEST 161 +#define EVP_R_UNKNOWN_OPTION 169 #define EVP_R_UNKNOWN_PBE_ALGORITHM 121 #define EVP_R_UNSUPORTED_NUMBER_OF_ROUNDS 135 #define EVP_R_UNSUPPORTED_ALGORITHM 156 diff --git a/app/openssl/include/openssl/kssl.h b/app/openssl/include/openssl/kssl.h index a3d20e1c..e4df8430 100644 --- a/app/openssl/include/openssl/kssl.h +++ b/app/openssl/include/openssl/kssl.h @@ -70,6 +70,15 @@ #include #include #include +#ifdef OPENSSL_SYS_WIN32 +/* These can sometimes get redefined indirectly by krb5 header files + * after they get undefed in ossl_typ.h + */ +#undef X509_NAME +#undef X509_EXTENSIONS +#undef OCSP_REQUEST +#undef OCSP_RESPONSE +#endif #ifdef __cplusplus extern "C" { @@ -172,6 +181,10 @@ krb5_error_code kssl_check_authent(KSSL_CTX *kssl_ctx, krb5_data *authentp, krb5_timestamp *atimep, KSSL_ERR *kssl_err); unsigned char *kssl_skip_confound(krb5_enctype enctype, unsigned char *authn); +void SSL_set0_kssl_ctx(SSL *s, KSSL_CTX *kctx); +KSSL_CTX * SSL_get0_kssl_ctx(SSL *s); +char *kssl_ctx_get0_client_princ(KSSL_CTX *kctx); + #ifdef __cplusplus } #endif diff --git a/app/openssl/include/openssl/md4.h b/app/openssl/include/openssl/md4.h index c3ed9b3f..a55368a7 100644 --- a/app/openssl/include/openssl/md4.h +++ b/app/openssl/include/openssl/md4.h @@ -105,6 +105,9 @@ typedef struct MD4state_st unsigned int num; } MD4_CTX; +#ifdef OPENSSL_FIPS +int private_MD4_Init(MD4_CTX *c); +#endif int MD4_Init(MD4_CTX *c); int MD4_Update(MD4_CTX *c, const void *data, size_t len); int MD4_Final(unsigned char *md, MD4_CTX *c); diff --git a/app/openssl/include/openssl/md5.h b/app/openssl/include/openssl/md5.h index 4cbf8438..541cc925 100644 --- a/app/openssl/include/openssl/md5.h +++ b/app/openssl/include/openssl/md5.h @@ -105,6 +105,9 @@ typedef struct MD5state_st unsigned int num; } MD5_CTX; +#ifdef OPENSSL_FIPS +int private_MD5_Init(MD5_CTX *c); +#endif int MD5_Init(MD5_CTX *c); int MD5_Update(MD5_CTX *c, const void *data, size_t len); int MD5_Final(unsigned char *md, MD5_CTX *c); diff --git a/app/openssl/include/openssl/modes.h b/app/openssl/include/openssl/modes.h index af8d97d7..f18215bb 100644 --- a/app/openssl/include/openssl/modes.h +++ b/app/openssl/include/openssl/modes.h @@ -15,6 +15,14 @@ typedef void (*cbc128_f)(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], int enc); +typedef void (*ctr128_f)(const unsigned char *in, unsigned char *out, + size_t blocks, const void *key, + const unsigned char ivec[16]); + +typedef void (*ccm128_f)(const unsigned char *in, unsigned char *out, + size_t blocks, const void *key, + const unsigned char ivec[16],unsigned char cmac[16]); + void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], block128_f block); @@ -27,6 +35,11 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned char ecount_buf[16], unsigned int *num, block128_f block); +void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], unsigned char ecount_buf[16], + unsigned int *num, ctr128_f ctr); + void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], int *num, @@ -57,3 +70,66 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out, size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], cbc128_f cbc); + +size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], block128_f block); +size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], cbc128_f cbc); +size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], block128_f block); +size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], cbc128_f cbc); + +typedef struct gcm128_context GCM128_CONTEXT; + +GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block); +void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block); +void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, + size_t len); +int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, + size_t len); +int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len); +int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len); +int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len, ctr128_f stream); +int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len, ctr128_f stream); +int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, + size_t len); +void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len); +void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx); + +typedef struct ccm128_context CCM128_CONTEXT; + +void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, + unsigned int M, unsigned int L, void *key,block128_f block); +int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, + const unsigned char *nonce, size_t nlen, size_t mlen); +void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, + const unsigned char *aad, size_t alen); +int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, size_t len); +int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, size_t len); +int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, size_t len, + ccm128_f stream); +int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, size_t len, + ccm128_f stream); +size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len); + +typedef struct xts128_context XTS128_CONTEXT; + +int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], + const unsigned char *inp, unsigned char *out, size_t len, int enc); diff --git a/app/openssl/include/openssl/obj_mac.h b/app/openssl/include/openssl/obj_mac.h index 282f11a8..b5ea7cda 100644 --- a/app/openssl/include/openssl/obj_mac.h +++ b/app/openssl/include/openssl/obj_mac.h @@ -580,6 +580,21 @@ #define NID_sha1WithRSAEncryption 65 #define OBJ_sha1WithRSAEncryption OBJ_pkcs1,5L +#define SN_rsaesOaep "RSAES-OAEP" +#define LN_rsaesOaep "rsaesOaep" +#define NID_rsaesOaep 919 +#define OBJ_rsaesOaep OBJ_pkcs1,7L + +#define SN_mgf1 "MGF1" +#define LN_mgf1 "mgf1" +#define NID_mgf1 911 +#define OBJ_mgf1 OBJ_pkcs1,8L + +#define SN_rsassaPss "RSASSA-PSS" +#define LN_rsassaPss "rsassaPss" +#define NID_rsassaPss 912 +#define OBJ_rsassaPss OBJ_pkcs1,10L + #define SN_sha256WithRSAEncryption "RSA-SHA256" #define LN_sha256WithRSAEncryption "sha256WithRSAEncryption" #define NID_sha256WithRSAEncryption 668 @@ -981,6 +996,10 @@ #define NID_id_smime_alg_CMSRC2wrap 247 #define OBJ_id_smime_alg_CMSRC2wrap OBJ_id_smime_alg,7L +#define SN_id_alg_PWRI_KEK "id-alg-PWRI-KEK" +#define NID_id_alg_PWRI_KEK 893 +#define OBJ_id_alg_PWRI_KEK OBJ_id_smime_alg,9L + #define SN_id_smime_cd_ldap "id-smime-cd-ldap" #define NID_id_smime_cd_ldap 248 #define OBJ_id_smime_cd_ldap OBJ_id_smime_cd,1L @@ -2399,6 +2418,11 @@ #define NID_no_rev_avail 403 #define OBJ_no_rev_avail OBJ_id_ce,56L +#define SN_anyExtendedKeyUsage "anyExtendedKeyUsage" +#define LN_anyExtendedKeyUsage "Any Extended Key Usage" +#define NID_anyExtendedKeyUsage 910 +#define OBJ_anyExtendedKeyUsage OBJ_ext_key_usage,0L + #define SN_netscape "Netscape" #define LN_netscape "Netscape Communications Corp." #define NID_netscape 57 @@ -2586,6 +2610,24 @@ #define NID_aes_128_cfb128 421 #define OBJ_aes_128_cfb128 OBJ_aes,4L +#define SN_id_aes128_wrap "id-aes128-wrap" +#define NID_id_aes128_wrap 788 +#define OBJ_id_aes128_wrap OBJ_aes,5L + +#define SN_aes_128_gcm "id-aes128-GCM" +#define LN_aes_128_gcm "aes-128-gcm" +#define NID_aes_128_gcm 895 +#define OBJ_aes_128_gcm OBJ_aes,6L + +#define SN_aes_128_ccm "id-aes128-CCM" +#define LN_aes_128_ccm "aes-128-ccm" +#define NID_aes_128_ccm 896 +#define OBJ_aes_128_ccm OBJ_aes,7L + +#define SN_id_aes128_wrap_pad "id-aes128-wrap-pad" +#define NID_id_aes128_wrap_pad 897 +#define OBJ_id_aes128_wrap_pad OBJ_aes,8L + #define SN_aes_192_ecb "AES-192-ECB" #define LN_aes_192_ecb "aes-192-ecb" #define NID_aes_192_ecb 422 @@ -2606,6 +2648,24 @@ #define NID_aes_192_cfb128 425 #define OBJ_aes_192_cfb128 OBJ_aes,24L +#define SN_id_aes192_wrap "id-aes192-wrap" +#define NID_id_aes192_wrap 789 +#define OBJ_id_aes192_wrap OBJ_aes,25L + +#define SN_aes_192_gcm "id-aes192-GCM" +#define LN_aes_192_gcm "aes-192-gcm" +#define NID_aes_192_gcm 898 +#define OBJ_aes_192_gcm OBJ_aes,26L + +#define SN_aes_192_ccm "id-aes192-CCM" +#define LN_aes_192_ccm "aes-192-ccm" +#define NID_aes_192_ccm 899 +#define OBJ_aes_192_ccm OBJ_aes,27L + +#define SN_id_aes192_wrap_pad "id-aes192-wrap-pad" +#define NID_id_aes192_wrap_pad 900 +#define OBJ_id_aes192_wrap_pad OBJ_aes,28L + #define SN_aes_256_ecb "AES-256-ECB" #define LN_aes_256_ecb "aes-256-ecb" #define NID_aes_256_ecb 426 @@ -2626,6 +2686,24 @@ #define NID_aes_256_cfb128 429 #define OBJ_aes_256_cfb128 OBJ_aes,44L +#define SN_id_aes256_wrap "id-aes256-wrap" +#define NID_id_aes256_wrap 790 +#define OBJ_id_aes256_wrap OBJ_aes,45L + +#define SN_aes_256_gcm "id-aes256-GCM" +#define LN_aes_256_gcm "aes-256-gcm" +#define NID_aes_256_gcm 901 +#define OBJ_aes_256_gcm OBJ_aes,46L + +#define SN_aes_256_ccm "id-aes256-CCM" +#define LN_aes_256_ccm "aes-256-ccm" +#define NID_aes_256_ccm 902 +#define OBJ_aes_256_ccm OBJ_aes,47L + +#define SN_id_aes256_wrap_pad "id-aes256-wrap-pad" +#define NID_id_aes256_wrap_pad 903 +#define OBJ_id_aes256_wrap_pad OBJ_aes,48L + #define SN_aes_128_cfb1 "AES-128-CFB1" #define LN_aes_128_cfb1 "aes-128-cfb1" #define NID_aes_128_cfb1 650 @@ -2650,6 +2728,26 @@ #define LN_aes_256_cfb8 "aes-256-cfb8" #define NID_aes_256_cfb8 655 +#define SN_aes_128_ctr "AES-128-CTR" +#define LN_aes_128_ctr "aes-128-ctr" +#define NID_aes_128_ctr 904 + +#define SN_aes_192_ctr "AES-192-CTR" +#define LN_aes_192_ctr "aes-192-ctr" +#define NID_aes_192_ctr 905 + +#define SN_aes_256_ctr "AES-256-CTR" +#define LN_aes_256_ctr "aes-256-ctr" +#define NID_aes_256_ctr 906 + +#define SN_aes_128_xts "AES-128-XTS" +#define LN_aes_128_xts "aes-128-xts" +#define NID_aes_128_xts 913 + +#define SN_aes_256_xts "AES-256-XTS" +#define LN_aes_256_xts "aes-256-xts" +#define NID_aes_256_xts 914 + #define SN_des_cfb1 "DES-CFB1" #define LN_des_cfb1 "des-cfb1" #define NID_des_cfb1 656 @@ -2666,18 +2764,6 @@ #define LN_des_ede3_cfb8 "des-ede3-cfb8" #define NID_des_ede3_cfb8 659 -#define SN_id_aes128_wrap "id-aes128-wrap" -#define NID_id_aes128_wrap 788 -#define OBJ_id_aes128_wrap OBJ_aes,5L - -#define SN_id_aes192_wrap "id-aes192-wrap" -#define NID_id_aes192_wrap 789 -#define OBJ_id_aes192_wrap OBJ_aes,25L - -#define SN_id_aes256_wrap "id-aes256-wrap" -#define NID_id_aes256_wrap 790 -#define OBJ_id_aes256_wrap OBJ_aes,45L - #define OBJ_nist_hashalgs OBJ_nistAlgorithms,2L #define SN_sha256 "SHA256" @@ -3810,6 +3896,18 @@ #define NID_camellia_256_cbc 753 #define OBJ_camellia_256_cbc 1L,2L,392L,200011L,61L,1L,1L,1L,4L +#define SN_id_camellia128_wrap "id-camellia128-wrap" +#define NID_id_camellia128_wrap 907 +#define OBJ_id_camellia128_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,2L + +#define SN_id_camellia192_wrap "id-camellia192-wrap" +#define NID_id_camellia192_wrap 908 +#define OBJ_id_camellia192_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,3L + +#define SN_id_camellia256_wrap "id-camellia256-wrap" +#define NID_id_camellia256_wrap 909 +#define OBJ_id_camellia256_wrap 1L,2L,392L,200011L,61L,1L,1L,3L,4L + #define OBJ_ntt_ds 0L,3L,4401L,5L #define OBJ_camellia OBJ_ntt_ds,3L,1L,9L @@ -3912,3 +4010,23 @@ #define LN_hmac "hmac" #define NID_hmac 855 +#define SN_cmac "CMAC" +#define LN_cmac "cmac" +#define NID_cmac 894 + +#define SN_rc4_hmac_md5 "RC4-HMAC-MD5" +#define LN_rc4_hmac_md5 "rc4-hmac-md5" +#define NID_rc4_hmac_md5 915 + +#define SN_aes_128_cbc_hmac_sha1 "AES-128-CBC-HMAC-SHA1" +#define LN_aes_128_cbc_hmac_sha1 "aes-128-cbc-hmac-sha1" +#define NID_aes_128_cbc_hmac_sha1 916 + +#define SN_aes_192_cbc_hmac_sha1 "AES-192-CBC-HMAC-SHA1" +#define LN_aes_192_cbc_hmac_sha1 "aes-192-cbc-hmac-sha1" +#define NID_aes_192_cbc_hmac_sha1 917 + +#define SN_aes_256_cbc_hmac_sha1 "AES-256-CBC-HMAC-SHA1" +#define LN_aes_256_cbc_hmac_sha1 "aes-256-cbc-hmac-sha1" +#define NID_aes_256_cbc_hmac_sha1 918 + diff --git a/app/openssl/include/openssl/ocsp.h b/app/openssl/include/openssl/ocsp.h index 31e45744..f14e9f7e 100644 --- a/app/openssl/include/openssl/ocsp.h +++ b/app/openssl/include/openssl/ocsp.h @@ -90,6 +90,13 @@ extern "C" { #define OCSP_RESPID_KEY 0x400 #define OCSP_NOTIME 0x800 +#ifdef OPENSSL_SYS_WIN32 + /* Under Win32 these are defined in wincrypt.h */ +#undef OCSP_REQUEST +#undef X509_NAME +#undef OCSP_RESPONSE +#endif + /* CertID ::= SEQUENCE { * hashAlgorithm AlgorithmIdentifier, * issuerNameHash OCTET STRING, -- Hash of Issuer's DN diff --git a/app/openssl/include/openssl/opensslconf-32.h b/app/openssl/include/openssl/opensslconf-32.h new file mode 100644 index 00000000..d6625489 --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-32.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#define BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf-64.h b/app/openssl/include/openssl/opensslconf-64.h new file mode 100644 index 00000000..70c5a2cb --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-64.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf-static-32.h b/app/openssl/include/openssl/opensslconf-static-32.h new file mode 100644 index 00000000..d6625489 --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-static-32.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#define BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf-static-64.h b/app/openssl/include/openssl/opensslconf-static-64.h new file mode 100644 index 00000000..70c5a2cb --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-static-64.h @@ -0,0 +1,316 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf-static-trusty.h b/app/openssl/include/openssl/opensslconf-static-trusty.h new file mode 100644 index 00000000..06f9f982 --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-static-trusty.h @@ -0,0 +1,448 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_CMS +# define OPENSSL_NO_CMS +#endif +#ifndef OPENSSL_NO_COMP +# define OPENSSL_NO_COMP +#endif +#ifndef OPENSSL_NO_CONF +# define OPENSSL_NO_CONF +#endif +#ifndef OPENSSL_NO_DES +# define OPENSSL_NO_DES +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_LOCKING +# define OPENSSL_NO_LOCKING +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MD4 +# define OPENSSL_NO_MD4 +#endif +#ifndef OPENSSL_NO_MD5 +# define OPENSSL_NO_MD5 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_OCSP +# define OPENSSL_NO_OCSP +#endif +#ifndef OPENSSL_NO_PEM +# define OPENSSL_NO_PEM +#endif +#ifndef OPENSSL_NO_PKCS12 +# define OPENSSL_NO_PKCS12 +#endif +#ifndef OPENSSL_NO_PQUEUE +# define OPENSSL_NO_PQUEUE +#endif +#ifndef OPENSSL_NO_RC2 +# define OPENSSL_NO_RC2 +#endif +#ifndef OPENSSL_NO_RC4 +# define OPENSSL_NO_RC4 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_SRP +# define OPENSSL_NO_SRP +#endif +#ifndef OPENSSL_NO_SSL2 +# define OPENSSL_NO_SSL2 +#endif +#ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_TLS1 +# define OPENSSL_NO_TLS1 +#endif +#ifndef OPENSSL_NO_TLSEXT +# define OPENSSL_NO_TLSEXT +#endif +#ifndef OPENSSL_NO_TS +# define OPENSSL_NO_TS +#endif +#ifndef OPENSSL_NO_TXT_DB +# define OPENSSL_NO_TXT_DB +#endif +#ifndef OPENSSL_NO_UI +# define OPENSSL_NO_UI +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_NO_ERR +# define OPENSSL_NO_ERR +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS) +# define NO_CMS +# endif +# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP) +# define NO_COMP +# endif +# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF) +# define NO_CONF +# endif +# if defined(OPENSSL_NO_DES) && !defined(NO_DES) +# define NO_DES +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING) +# define NO_LOCKING +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4) +# define NO_MD4 +# endif +# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5) +# define NO_MD5 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP) +# define NO_OCSP +# endif +# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM) +# define NO_PEM +# endif +# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12) +# define NO_PKCS12 +# endif +# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE) +# define NO_PQUEUE +# endif +# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2) +# define NO_RC2 +# endif +# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4) +# define NO_RC4 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP) +# define NO_SRP +# endif +# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2) +# define NO_SSL2 +# endif +# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3) +# define NO_SSL3 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1) +# define NO_TLS1 +# endif +# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT) +# define NO_TLSEXT +# endif +# if defined(OPENSSL_NO_TS) && !defined(NO_TS) +# define NO_TS +# endif +# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB) +# define NO_TXT_DB +# endif +# if defined(OPENSSL_NO_UI) && !defined(NO_UI) +# define NO_UI +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned int +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#undef RC4_CHUNK +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned long +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#undef BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#undef DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf-static.h b/app/openssl/include/openssl/opensslconf-static.h new file mode 100644 index 00000000..f63a6e0a --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-static.h @@ -0,0 +1,6 @@ +// Auto-generated - DO NOT EDIT! +#if defined(__LP64__) +#include "opensslconf-static-64.h" +#else +#include "opensslconf-static-32.h" +#endif diff --git a/app/openssl/include/openssl/opensslconf-trusty.h b/app/openssl/include/openssl/opensslconf-trusty.h new file mode 100644 index 00000000..06f9f982 --- /dev/null +++ b/app/openssl/include/openssl/opensslconf-trusty.h @@ -0,0 +1,448 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_CAMELLIA +# define OPENSSL_NO_CAMELLIA +#endif +#ifndef OPENSSL_NO_CAPIENG +# define OPENSSL_NO_CAPIENG +#endif +#ifndef OPENSSL_NO_CAST +# define OPENSSL_NO_CAST +#endif +#ifndef OPENSSL_NO_CMS +# define OPENSSL_NO_CMS +#endif +#ifndef OPENSSL_NO_COMP +# define OPENSSL_NO_COMP +#endif +#ifndef OPENSSL_NO_CONF +# define OPENSSL_NO_CONF +#endif +#ifndef OPENSSL_NO_DES +# define OPENSSL_NO_DES +#endif +#ifndef OPENSSL_NO_DTLS1 +# define OPENSSL_NO_DTLS1 +#endif +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_GOST +# define OPENSSL_NO_GOST +#endif +#ifndef OPENSSL_NO_HEARTBEATS +# define OPENSSL_NO_HEARTBEATS +#endif +#ifndef OPENSSL_NO_IDEA +# define OPENSSL_NO_IDEA +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_LOCKING +# define OPENSSL_NO_LOCKING +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_MD4 +# define OPENSSL_NO_MD4 +#endif +#ifndef OPENSSL_NO_MD5 +# define OPENSSL_NO_MD5 +#endif +#ifndef OPENSSL_NO_MDC2 +# define OPENSSL_NO_MDC2 +#endif +#ifndef OPENSSL_NO_OCSP +# define OPENSSL_NO_OCSP +#endif +#ifndef OPENSSL_NO_PEM +# define OPENSSL_NO_PEM +#endif +#ifndef OPENSSL_NO_PKCS12 +# define OPENSSL_NO_PKCS12 +#endif +#ifndef OPENSSL_NO_PQUEUE +# define OPENSSL_NO_PQUEUE +#endif +#ifndef OPENSSL_NO_RC2 +# define OPENSSL_NO_RC2 +#endif +#ifndef OPENSSL_NO_RC4 +# define OPENSSL_NO_RC4 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RDRAND +# define OPENSSL_NO_RDRAND +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_RIPEMD +# define OPENSSL_NO_RIPEMD +#endif +#ifndef OPENSSL_NO_RSAX +# define OPENSSL_NO_RSAX +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SEED +# define OPENSSL_NO_SEED +#endif +#ifndef OPENSSL_NO_SHA0 +# define OPENSSL_NO_SHA0 +#endif +#ifndef OPENSSL_NO_SRP +# define OPENSSL_NO_SRP +#endif +#ifndef OPENSSL_NO_SSL2 +# define OPENSSL_NO_SSL2 +#endif +#ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +#endif +#ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_TLS1 +# define OPENSSL_NO_TLS1 +#endif +#ifndef OPENSSL_NO_TLSEXT +# define OPENSSL_NO_TLSEXT +#endif +#ifndef OPENSSL_NO_TS +# define OPENSSL_NO_TS +#endif +#ifndef OPENSSL_NO_TXT_DB +# define OPENSSL_NO_TXT_DB +#endif +#ifndef OPENSSL_NO_UI +# define OPENSSL_NO_UI +#endif +#ifndef OPENSSL_NO_WHIRLPOOL +# define OPENSSL_NO_WHIRLPOOL +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_NO_ERR +# define OPENSSL_NO_ERR +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA) +# define NO_CAMELLIA +# endif +# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG) +# define NO_CAPIENG +# endif +# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) +# define NO_CAST +# endif +# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS) +# define NO_CMS +# endif +# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP) +# define NO_COMP +# endif +# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF) +# define NO_CONF +# endif +# if defined(OPENSSL_NO_DES) && !defined(NO_DES) +# define NO_DES +# endif +# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1) +# define NO_DTLS1 +# endif +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST) +# define NO_GOST +# endif +# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS) +# define NO_HEARTBEATS +# endif +# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) +# define NO_IDEA +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING) +# define NO_LOCKING +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4) +# define NO_MD4 +# endif +# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5) +# define NO_MD5 +# endif +# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2) +# define NO_MDC2 +# endif +# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP) +# define NO_OCSP +# endif +# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM) +# define NO_PEM +# endif +# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12) +# define NO_PKCS12 +# endif +# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE) +# define NO_PQUEUE +# endif +# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2) +# define NO_RC2 +# endif +# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4) +# define NO_RC4 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND) +# define NO_RDRAND +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD) +# define NO_RIPEMD +# endif +# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX) +# define NO_RSAX +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) +# define NO_SEED +# endif +# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) +# define NO_SHA0 +# endif +# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP) +# define NO_SRP +# endif +# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2) +# define NO_SSL2 +# endif +# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3) +# define NO_SSL3 +# endif +# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE) +# define NO_STATIC_ENGINE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1) +# define NO_TLS1 +# endif +# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT) +# define NO_TLSEXT +# endif +# if defined(OPENSSL_NO_TS) && !defined(NO_TS) +# define NO_TS +# endif +# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB) +# define NO_TXT_DB +# endif +# if defined(OPENSSL_NO_UI) && !defined(NO_UI) +# define NO_UI +# endif +# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL) +# define NO_WHIRLPOOL +# endif +#endif + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned int +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#undef RC4_CHUNK +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned long +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#undef SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#define THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#undef BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#undef DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslconf.h b/app/openssl/include/openssl/opensslconf.h index 26ac6ba3..94212a08 100644 --- a/app/openssl/include/openssl/opensslconf.h +++ b/app/openssl/include/openssl/opensslconf.h @@ -1,250 +1,10 @@ -/* opensslconf.h */ -/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ - -/* OpenSSL was configured with the following options: */ -#ifndef OPENSSL_DOING_MAKEDEPEND - - -#ifndef OPENSSL_NO_CAST -# define OPENSSL_NO_CAST +// Auto-generated - DO NOT EDIT! +#ifndef OPENSSL_SYS_TRUSTY +#if defined(__LP64__) +#include "opensslconf-64.h" +#else +#include "opensslconf-32.h" #endif -#ifndef OPENSSL_NO_GMP -# define OPENSSL_NO_GMP +#else +#include "opensslconf-trusty.h" #endif -#ifndef OPENSSL_NO_IDEA -# define OPENSSL_NO_IDEA -#endif -#ifndef OPENSSL_NO_JPAKE -# define OPENSSL_NO_JPAKE -#endif -#ifndef OPENSSL_NO_KRB5 -# define OPENSSL_NO_KRB5 -#endif -#ifndef OPENSSL_NO_MD2 -# define OPENSSL_NO_MD2 -#endif -#ifndef OPENSSL_NO_RC5 -# define OPENSSL_NO_RC5 -#endif -#ifndef OPENSSL_NO_RFC3779 -# define OPENSSL_NO_RFC3779 -#endif -#ifndef OPENSSL_NO_SEED -# define OPENSSL_NO_SEED -#endif -#ifndef OPENSSL_NO_SHA0 -# define OPENSSL_NO_SHA0 -#endif -#ifndef OPENSSL_NO_STORE -# define OPENSSL_NO_STORE -#endif -#ifndef OPENSSL_NO_WHRLPOOL -# define OPENSSL_NO_WHRLPOOL -#endif - -#endif /* OPENSSL_DOING_MAKEDEPEND */ - -#ifndef OPENSSL_THREADS -# define OPENSSL_THREADS -#endif -#ifndef OPENSSL_NO_DYNAMIC_ENGINE -# define OPENSSL_NO_DYNAMIC_ENGINE -#endif - -/* The OPENSSL_NO_* macros are also defined as NO_* if the application - asks for it. This is a transient feature that is provided for those - who haven't had the time to do the appropriate changes in their - applications. */ -#ifdef OPENSSL_ALGORITHM_DEFINES -# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST) -# define NO_CAST -# endif -# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) -# define NO_GMP -# endif -# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA) -# define NO_IDEA -# endif -# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) -# define NO_JPAKE -# endif -# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) -# define NO_KRB5 -# endif -# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) -# define NO_MD2 -# endif -# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) -# define NO_RC5 -# endif -# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) -# define NO_RFC3779 -# endif -# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED) -# define NO_SEED -# endif -# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0) -# define NO_SHA0 -# endif -# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) -# define NO_STORE -# endif -# if defined(OPENSSL_NO_WHRLPOOL) && !defined(NO_WHRLPOOL) -# define NO_WHRLPOOL -# endif -#endif - -/* crypto/opensslconf.h.in */ - -/* Generate 80386 code? */ -#undef I386_ONLY - -#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ -#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) -#define ENGINESDIR "/usr/local/ssl/lib/engines" -#define OPENSSLDIR "/usr/local/ssl" -#endif -#endif - -#undef OPENSSL_UNISTD -#define OPENSSL_UNISTD - -#undef OPENSSL_EXPORT_VAR_AS_FUNCTION - -#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) -#define IDEA_INT unsigned int -#endif - -#if defined(HEADER_MD2_H) && !defined(MD2_INT) -#define MD2_INT unsigned int -#endif - -#if defined(HEADER_RC2_H) && !defined(RC2_INT) -/* I need to put in a mod for the alpha - eay */ -#define RC2_INT unsigned int -#endif - -#if defined(HEADER_RC4_H) -#if !defined(RC4_INT) -/* using int types make the structure larger but make the code faster - * on most boxes I have tested - up to %20 faster. */ -/* - * I don't know what does "most" mean, but declaring "int" is a must on: - * - Intel P6 because partial register stalls are very expensive; - * - elder Alpha because it lacks byte load/store instructions; - */ -#define RC4_INT unsigned char -#endif -#if !defined(RC4_CHUNK) -/* - * This enables code handling data aligned at natural CPU word - * boundary. See crypto/rc4/rc4_enc.c for further details. - */ -#define RC4_CHUNK unsigned long -#endif -#endif - -#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) -/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a - * %20 speed up (longs are 8 bytes, int's are 4). */ -#ifndef DES_LONG -#define DES_LONG unsigned int -#endif -#endif - -#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) -#define CONFIG_HEADER_BN_H -#define BN_LLONG - -/* Should we define BN_DIV2W here? */ - -/* Only one for the following should be defined */ -#undef SIXTY_FOUR_BIT_LONG -#undef SIXTY_FOUR_BIT -#define THIRTY_TWO_BIT -#endif - -#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) -#define CONFIG_HEADER_RC4_LOCL_H -/* if this is defined data[i] is used instead of *data, this is a %20 - * speedup on x86 */ -#undef RC4_INDEX -#endif - -#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) -#define CONFIG_HEADER_BF_LOCL_H -#define BF_PTR -#endif /* HEADER_BF_LOCL_H */ - -#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) -#define CONFIG_HEADER_DES_LOCL_H -#ifndef DES_DEFAULT_OPTIONS -/* the following is tweaked from a config script, that is why it is a - * protected undef/define */ -#ifndef DES_PTR -#undef DES_PTR -#endif - -/* This helps C compiler generate the correct code for multiple functional - * units. It reduces register dependancies at the expense of 2 more - * registers */ -#ifndef DES_RISC1 -#undef DES_RISC1 -#endif - -#ifndef DES_RISC2 -#undef DES_RISC2 -#endif - -#if defined(DES_RISC1) && defined(DES_RISC2) -YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! -#endif - -/* Unroll the inner loop, this sometimes helps, sometimes hinders. - * Very mucy CPU dependant */ -#ifndef DES_UNROLL -#define DES_UNROLL -#endif - -/* These default values were supplied by - * Peter Gutman - * They are only used if nothing else has been defined */ -#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) -/* Special defines which change the way the code is built depending on the - CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find - even newer MIPS CPU's, but at the moment one size fits all for - optimization options. Older Sparc's work better with only UNROLL, but - there's no way to tell at compile time what it is you're running on */ - -#if defined( sun ) /* Newer Sparc's */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#elif defined( __ultrix ) /* Older MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined( __osf1__ ) /* Alpha */ -# define DES_PTR -# define DES_RISC2 -#elif defined ( _AIX ) /* RS6000 */ - /* Unknown */ -#elif defined( __hpux ) /* HP-PA */ - /* Unknown */ -#elif defined( __aux ) /* 68K */ - /* Unknown */ -#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ -# define DES_UNROLL -#elif defined( __sgi ) /* Newer MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#endif /* Systems-specific speed defines */ -#endif - -#endif /* DES_DEFAULT_OPTIONS */ -#endif /* HEADER_DES_LOCL_H */ diff --git a/app/openssl/include/openssl/opensslv.h b/app/openssl/include/openssl/opensslv.h index 310a3387..ebe71807 100644 --- a/app/openssl/include/openssl/opensslv.h +++ b/app/openssl/include/openssl/opensslv.h @@ -25,11 +25,11 @@ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x1000005fL +#define OPENSSL_VERSION_NUMBER 0x1000107fL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0e-fips 6 Sep 2011" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g-fips 7 Apr 2014" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0e 6 Sep 2011" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1g 7 Apr 2014" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/app/openssl/include/openssl/ossl_typ.h b/app/openssl/include/openssl/ossl_typ.h index 12bd7014..ea9227f6 100644 --- a/app/openssl/include/openssl/ossl_typ.h +++ b/app/openssl/include/openssl/ossl_typ.h @@ -91,10 +91,12 @@ typedef struct asn1_string_st ASN1_TIME; typedef struct asn1_string_st ASN1_GENERALIZEDTIME; typedef struct asn1_string_st ASN1_VISIBLESTRING; typedef struct asn1_string_st ASN1_UTF8STRING; +typedef struct asn1_string_st ASN1_STRING; typedef int ASN1_BOOLEAN; typedef int ASN1_NULL; #endif +typedef struct ASN1_ITEM_st ASN1_ITEM; typedef struct asn1_pctx_st ASN1_PCTX; #ifdef OPENSSL_SYS_WIN32 diff --git a/app/openssl/include/openssl/rand.h b/app/openssl/include/openssl/rand.h index ac6c0217..bb5520e8 100644 --- a/app/openssl/include/openssl/rand.h +++ b/app/openssl/include/openssl/rand.h @@ -119,6 +119,11 @@ int RAND_event(UINT, WPARAM, LPARAM); #endif +#ifdef OPENSSL_FIPS +void RAND_set_fips_drbg_type(int type, int flags); +int RAND_init_fips(void); +#endif + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -129,9 +134,14 @@ void ERR_load_RAND_strings(void); /* Function codes. */ #define RAND_F_RAND_GET_RAND_METHOD 101 +#define RAND_F_RAND_INIT_FIPS 102 #define RAND_F_SSLEAY_RAND_BYTES 100 /* Reason codes. */ +#define RAND_R_DUAL_EC_DRBG_DISABLED 104 +#define RAND_R_ERROR_INITIALISING_DRBG 102 +#define RAND_R_ERROR_INSTANTIATING_DRBG 103 +#define RAND_R_NO_FIPS_RANDOM_METHOD_SET 101 #define RAND_R_PRNG_NOT_SEEDED 100 #ifdef __cplusplus diff --git a/app/openssl/include/openssl/rc2.h b/app/openssl/include/openssl/rc2.h index 34c83623..e542ec94 100644 --- a/app/openssl/include/openssl/rc2.h +++ b/app/openssl/include/openssl/rc2.h @@ -79,7 +79,9 @@ typedef struct rc2_key_st RC2_INT data[64]; } RC2_KEY; - +#ifdef OPENSSL_FIPS +void private_RC2_set_key(RC2_KEY *key, int len, const unsigned char *data,int bits); +#endif void RC2_set_key(RC2_KEY *key, int len, const unsigned char *data,int bits); void RC2_ecb_encrypt(const unsigned char *in,unsigned char *out,RC2_KEY *key, int enc); diff --git a/app/openssl/include/openssl/rc4.h b/app/openssl/include/openssl/rc4.h index 29d1accc..88ceb46b 100644 --- a/app/openssl/include/openssl/rc4.h +++ b/app/openssl/include/openssl/rc4.h @@ -79,6 +79,7 @@ typedef struct rc4_key_st const char *RC4_options(void); void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); +void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data); void RC4(RC4_KEY *key, size_t len, const unsigned char *indata, unsigned char *outdata); diff --git a/app/openssl/include/openssl/ripemd.h b/app/openssl/include/openssl/ripemd.h index 5942eb61..189bd8c9 100644 --- a/app/openssl/include/openssl/ripemd.h +++ b/app/openssl/include/openssl/ripemd.h @@ -91,6 +91,9 @@ typedef struct RIPEMD160state_st unsigned int num; } RIPEMD160_CTX; +#ifdef OPENSSL_FIPS +int private_RIPEMD160_Init(RIPEMD160_CTX *c); +#endif int RIPEMD160_Init(RIPEMD160_CTX *c); int RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len); int RIPEMD160_Final(unsigned char *md, RIPEMD160_CTX *c); diff --git a/app/openssl/include/openssl/rsa.h b/app/openssl/include/openssl/rsa.h index cf743436..5f269e57 100644 --- a/app/openssl/include/openssl/rsa.h +++ b/app/openssl/include/openssl/rsa.h @@ -222,12 +222,22 @@ struct rsa_st EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, -1, EVP_PKEY_CTRL_RSA_PADDING, \ pad, NULL) +#define EVP_PKEY_CTX_get_rsa_padding(ctx, ppad) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, -1, \ + EVP_PKEY_CTRL_GET_RSA_PADDING, 0, ppad) + #define EVP_PKEY_CTX_set_rsa_pss_saltlen(ctx, len) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ (EVP_PKEY_OP_SIGN|EVP_PKEY_OP_VERIFY), \ EVP_PKEY_CTRL_RSA_PSS_SALTLEN, \ len, NULL) +#define EVP_PKEY_CTX_get_rsa_pss_saltlen(ctx, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ + (EVP_PKEY_OP_SIGN|EVP_PKEY_OP_VERIFY), \ + EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN, \ + 0, plen) + #define EVP_PKEY_CTX_set_rsa_keygen_bits(ctx, bits) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_KEYGEN, \ EVP_PKEY_CTRL_RSA_KEYGEN_BITS, bits, NULL) @@ -236,11 +246,24 @@ struct rsa_st EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_KEYGEN, \ EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP, 0, pubexp) +#define EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTRL_RSA_MGF1_MD, 0, (void *)md) + +#define EVP_PKEY_CTX_get_rsa_mgf1_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTRL_GET_RSA_MGF1_MD, 0, (void *)pmd) + #define EVP_PKEY_CTRL_RSA_PADDING (EVP_PKEY_ALG_CTRL + 1) #define EVP_PKEY_CTRL_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 2) #define EVP_PKEY_CTRL_RSA_KEYGEN_BITS (EVP_PKEY_ALG_CTRL + 3) #define EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP (EVP_PKEY_ALG_CTRL + 4) +#define EVP_PKEY_CTRL_RSA_MGF1_MD (EVP_PKEY_ALG_CTRL + 5) + +#define EVP_PKEY_CTRL_GET_RSA_PADDING (EVP_PKEY_ALG_CTRL + 6) +#define EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 7) +#define EVP_PKEY_CTRL_GET_RSA_MGF1_MD (EVP_PKEY_ALG_CTRL + 8) #define RSA_PKCS1_PADDING 1 #define RSA_SSLV23_PADDING 2 @@ -257,7 +280,7 @@ struct rsa_st RSA * RSA_new(void); RSA * RSA_new_method(ENGINE *engine); -int RSA_size(const RSA *); +int RSA_size(const RSA *rsa); /* Deprecated version */ #ifndef OPENSSL_NO_DEPRECATED @@ -300,6 +323,16 @@ const RSA_METHOD *RSA_null_method(void); DECLARE_ASN1_ENCODE_FUNCTIONS_const(RSA, RSAPublicKey) DECLARE_ASN1_ENCODE_FUNCTIONS_const(RSA, RSAPrivateKey) +typedef struct rsa_pss_params_st + { + X509_ALGOR *hashAlgorithm; + X509_ALGOR *maskGenAlgorithm; + ASN1_INTEGER *saltLength; + ASN1_INTEGER *trailerField; + } RSA_PSS_PARAMS; + +DECLARE_ASN1_FUNCTIONS(RSA_PSS_PARAMS) + #ifndef OPENSSL_NO_FP_API int RSA_print_fp(FILE *fp, const RSA *r,int offset); #endif @@ -380,6 +413,14 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM, const unsigned char *mHash, const EVP_MD *Hash, int sLen); +int RSA_verify_PKCS1_PSS_mgf1(RSA *rsa, const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, + const unsigned char *EM, int sLen); + +int RSA_padding_add_PKCS1_PSS_mgf1(RSA *rsa, unsigned char *EM, + const unsigned char *mHash, + const EVP_MD *Hash, const EVP_MD *mgf1Hash, int sLen); + int RSA_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, CRYPTO_EX_free *free_func); int RSA_set_ex_data(RSA *r,int idx,void *arg); @@ -388,6 +429,25 @@ void *RSA_get_ex_data(const RSA *r, int idx); RSA *RSAPublicKey_dup(RSA *rsa); RSA *RSAPrivateKey_dup(RSA *rsa); +/* If this flag is set the RSA method is FIPS compliant and can be used + * in FIPS mode. This is set in the validated module method. If an + * application sets this flag in its own methods it is its responsibility + * to ensure the result is compliant. + */ + +#define RSA_FLAG_FIPS_METHOD 0x0400 + +/* If this flag is set the operations normally disabled in FIPS mode are + * permitted it is then the applications responsibility to ensure that the + * usage is compliant. + */ + +#define RSA_FLAG_NON_FIPS_ALLOW 0x0400 +/* Application has decided PRNG is good enough to generate a key: don't + * check. + */ +#define RSA_FLAG_CHECKED 0x0800 + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -405,6 +465,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_PKEY_RSA_CTRL 143 #define RSA_F_PKEY_RSA_CTRL_STR 144 #define RSA_F_PKEY_RSA_SIGN 142 +#define RSA_F_PKEY_RSA_VERIFY 154 #define RSA_F_PKEY_RSA_VERIFYRECOVER 141 #define RSA_F_RSA_BUILTIN_KEYGEN 129 #define RSA_F_RSA_CHECK_KEY 123 @@ -413,6 +474,8 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_EAY_PUBLIC_DECRYPT 103 #define RSA_F_RSA_EAY_PUBLIC_ENCRYPT 104 #define RSA_F_RSA_GENERATE_KEY 105 +#define RSA_F_RSA_GENERATE_KEY_EX 155 +#define RSA_F_RSA_ITEM_VERIFY 156 #define RSA_F_RSA_MEMORY_LOCK 130 #define RSA_F_RSA_NEW_METHOD 106 #define RSA_F_RSA_NULL 124 @@ -424,6 +487,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_PADDING_ADD_NONE 107 #define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP 121 #define RSA_F_RSA_PADDING_ADD_PKCS1_PSS 125 +#define RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1 148 #define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_1 108 #define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_2 109 #define RSA_F_RSA_PADDING_ADD_SSLV23 110 @@ -436,8 +500,12 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_PADDING_CHECK_X931 128 #define RSA_F_RSA_PRINT 115 #define RSA_F_RSA_PRINT_FP 116 +#define RSA_F_RSA_PRIVATE_DECRYPT 150 +#define RSA_F_RSA_PRIVATE_ENCRYPT 151 #define RSA_F_RSA_PRIV_DECODE 137 #define RSA_F_RSA_PRIV_ENCODE 138 +#define RSA_F_RSA_PUBLIC_DECRYPT 152 +#define RSA_F_RSA_PUBLIC_ENCRYPT 153 #define RSA_F_RSA_PUB_DECODE 139 #define RSA_F_RSA_SETUP_BLINDING 136 #define RSA_F_RSA_SIGN 117 @@ -445,6 +513,7 @@ void ERR_load_RSA_strings(void); #define RSA_F_RSA_VERIFY 119 #define RSA_F_RSA_VERIFY_ASN1_OCTET_STRING 120 #define RSA_F_RSA_VERIFY_PKCS1_PSS 126 +#define RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1 149 /* Reason codes. */ #define RSA_R_ALGORITHM_MISMATCH 100 @@ -470,19 +539,24 @@ void ERR_load_RSA_strings(void); #define RSA_R_INVALID_HEADER 137 #define RSA_R_INVALID_KEYBITS 145 #define RSA_R_INVALID_MESSAGE_LENGTH 131 +#define RSA_R_INVALID_MGF1_MD 156 #define RSA_R_INVALID_PADDING 138 #define RSA_R_INVALID_PADDING_MODE 141 +#define RSA_R_INVALID_PSS_PARAMETERS 149 #define RSA_R_INVALID_PSS_SALTLEN 146 +#define RSA_R_INVALID_SALT_LENGTH 150 #define RSA_R_INVALID_TRAILER 139 #define RSA_R_INVALID_X931_DIGEST 142 #define RSA_R_IQMP_NOT_INVERSE_OF_Q 126 #define RSA_R_KEY_SIZE_TOO_SMALL 120 #define RSA_R_LAST_OCTET_INVALID 134 #define RSA_R_MODULUS_TOO_LARGE 105 +#define RSA_R_NON_FIPS_RSA_METHOD 157 #define RSA_R_NO_PUBLIC_EXPONENT 140 #define RSA_R_NULL_BEFORE_BLOCK_MISSING 113 #define RSA_R_N_DOES_NOT_EQUAL_P_Q 127 #define RSA_R_OAEP_DECODING_ERROR 121 +#define RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE 158 #define RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 148 #define RSA_R_PADDING_CHECK_FAILED 114 #define RSA_R_P_NOT_PRIME 128 @@ -493,7 +567,12 @@ void ERR_load_RSA_strings(void); #define RSA_R_SSLV3_ROLLBACK_ATTACK 115 #define RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD 116 #define RSA_R_UNKNOWN_ALGORITHM_TYPE 117 +#define RSA_R_UNKNOWN_MASK_DIGEST 151 #define RSA_R_UNKNOWN_PADDING_TYPE 118 +#define RSA_R_UNKNOWN_PSS_DIGEST 152 +#define RSA_R_UNSUPPORTED_MASK_ALGORITHM 153 +#define RSA_R_UNSUPPORTED_MASK_PARAMETER 154 +#define RSA_R_UNSUPPORTED_SIGNATURE_TYPE 155 #define RSA_R_VALUE_MISSING 147 #define RSA_R_WRONG_SIGNATURE_LENGTH 119 diff --git a/app/openssl/include/openssl/safestack.h b/app/openssl/include/openssl/safestack.h index 3e76aa58..ea3aa0d8 100644 --- a/app/openssl/include/openssl/safestack.h +++ b/app/openssl/include/openssl/safestack.h @@ -1459,6 +1459,94 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_POLICY_MAPPING_sort(st) SKM_sk_sort(POLICY_MAPPING, (st)) #define sk_POLICY_MAPPING_is_sorted(st) SKM_sk_is_sorted(POLICY_MAPPING, (st)) +#define sk_SRP_gN_new(cmp) SKM_sk_new(SRP_gN, (cmp)) +#define sk_SRP_gN_new_null() SKM_sk_new_null(SRP_gN) +#define sk_SRP_gN_free(st) SKM_sk_free(SRP_gN, (st)) +#define sk_SRP_gN_num(st) SKM_sk_num(SRP_gN, (st)) +#define sk_SRP_gN_value(st, i) SKM_sk_value(SRP_gN, (st), (i)) +#define sk_SRP_gN_set(st, i, val) SKM_sk_set(SRP_gN, (st), (i), (val)) +#define sk_SRP_gN_zero(st) SKM_sk_zero(SRP_gN, (st)) +#define sk_SRP_gN_push(st, val) SKM_sk_push(SRP_gN, (st), (val)) +#define sk_SRP_gN_unshift(st, val) SKM_sk_unshift(SRP_gN, (st), (val)) +#define sk_SRP_gN_find(st, val) SKM_sk_find(SRP_gN, (st), (val)) +#define sk_SRP_gN_find_ex(st, val) SKM_sk_find_ex(SRP_gN, (st), (val)) +#define sk_SRP_gN_delete(st, i) SKM_sk_delete(SRP_gN, (st), (i)) +#define sk_SRP_gN_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_gN, (st), (ptr)) +#define sk_SRP_gN_insert(st, val, i) SKM_sk_insert(SRP_gN, (st), (val), (i)) +#define sk_SRP_gN_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN, (st), (cmp)) +#define sk_SRP_gN_dup(st) SKM_sk_dup(SRP_gN, st) +#define sk_SRP_gN_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN, (st), (free_func)) +#define sk_SRP_gN_shift(st) SKM_sk_shift(SRP_gN, (st)) +#define sk_SRP_gN_pop(st) SKM_sk_pop(SRP_gN, (st)) +#define sk_SRP_gN_sort(st) SKM_sk_sort(SRP_gN, (st)) +#define sk_SRP_gN_is_sorted(st) SKM_sk_is_sorted(SRP_gN, (st)) + +#define sk_SRP_gN_cache_new(cmp) SKM_sk_new(SRP_gN_cache, (cmp)) +#define sk_SRP_gN_cache_new_null() SKM_sk_new_null(SRP_gN_cache) +#define sk_SRP_gN_cache_free(st) SKM_sk_free(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_num(st) SKM_sk_num(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_value(st, i) SKM_sk_value(SRP_gN_cache, (st), (i)) +#define sk_SRP_gN_cache_set(st, i, val) SKM_sk_set(SRP_gN_cache, (st), (i), (val)) +#define sk_SRP_gN_cache_zero(st) SKM_sk_zero(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_push(st, val) SKM_sk_push(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_unshift(st, val) SKM_sk_unshift(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_find(st, val) SKM_sk_find(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_find_ex(st, val) SKM_sk_find_ex(SRP_gN_cache, (st), (val)) +#define sk_SRP_gN_cache_delete(st, i) SKM_sk_delete(SRP_gN_cache, (st), (i)) +#define sk_SRP_gN_cache_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_gN_cache, (st), (ptr)) +#define sk_SRP_gN_cache_insert(st, val, i) SKM_sk_insert(SRP_gN_cache, (st), (val), (i)) +#define sk_SRP_gN_cache_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN_cache, (st), (cmp)) +#define sk_SRP_gN_cache_dup(st) SKM_sk_dup(SRP_gN_cache, st) +#define sk_SRP_gN_cache_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN_cache, (st), (free_func)) +#define sk_SRP_gN_cache_shift(st) SKM_sk_shift(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_pop(st) SKM_sk_pop(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_sort(st) SKM_sk_sort(SRP_gN_cache, (st)) +#define sk_SRP_gN_cache_is_sorted(st) SKM_sk_is_sorted(SRP_gN_cache, (st)) + +#define sk_SRP_user_pwd_new(cmp) SKM_sk_new(SRP_user_pwd, (cmp)) +#define sk_SRP_user_pwd_new_null() SKM_sk_new_null(SRP_user_pwd) +#define sk_SRP_user_pwd_free(st) SKM_sk_free(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_num(st) SKM_sk_num(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_value(st, i) SKM_sk_value(SRP_user_pwd, (st), (i)) +#define sk_SRP_user_pwd_set(st, i, val) SKM_sk_set(SRP_user_pwd, (st), (i), (val)) +#define sk_SRP_user_pwd_zero(st) SKM_sk_zero(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_push(st, val) SKM_sk_push(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_unshift(st, val) SKM_sk_unshift(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_find(st, val) SKM_sk_find(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_find_ex(st, val) SKM_sk_find_ex(SRP_user_pwd, (st), (val)) +#define sk_SRP_user_pwd_delete(st, i) SKM_sk_delete(SRP_user_pwd, (st), (i)) +#define sk_SRP_user_pwd_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRP_user_pwd, (st), (ptr)) +#define sk_SRP_user_pwd_insert(st, val, i) SKM_sk_insert(SRP_user_pwd, (st), (val), (i)) +#define sk_SRP_user_pwd_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_user_pwd, (st), (cmp)) +#define sk_SRP_user_pwd_dup(st) SKM_sk_dup(SRP_user_pwd, st) +#define sk_SRP_user_pwd_pop_free(st, free_func) SKM_sk_pop_free(SRP_user_pwd, (st), (free_func)) +#define sk_SRP_user_pwd_shift(st) SKM_sk_shift(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_pop(st) SKM_sk_pop(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_sort(st) SKM_sk_sort(SRP_user_pwd, (st)) +#define sk_SRP_user_pwd_is_sorted(st) SKM_sk_is_sorted(SRP_user_pwd, (st)) + +#define sk_SRTP_PROTECTION_PROFILE_new(cmp) SKM_sk_new(SRTP_PROTECTION_PROFILE, (cmp)) +#define sk_SRTP_PROTECTION_PROFILE_new_null() SKM_sk_new_null(SRTP_PROTECTION_PROFILE) +#define sk_SRTP_PROTECTION_PROFILE_free(st) SKM_sk_free(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_num(st) SKM_sk_num(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_value(st, i) SKM_sk_value(SRTP_PROTECTION_PROFILE, (st), (i)) +#define sk_SRTP_PROTECTION_PROFILE_set(st, i, val) SKM_sk_set(SRTP_PROTECTION_PROFILE, (st), (i), (val)) +#define sk_SRTP_PROTECTION_PROFILE_zero(st) SKM_sk_zero(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_push(st, val) SKM_sk_push(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_unshift(st, val) SKM_sk_unshift(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_find(st, val) SKM_sk_find(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_find_ex(st, val) SKM_sk_find_ex(SRTP_PROTECTION_PROFILE, (st), (val)) +#define sk_SRTP_PROTECTION_PROFILE_delete(st, i) SKM_sk_delete(SRTP_PROTECTION_PROFILE, (st), (i)) +#define sk_SRTP_PROTECTION_PROFILE_delete_ptr(st, ptr) SKM_sk_delete_ptr(SRTP_PROTECTION_PROFILE, (st), (ptr)) +#define sk_SRTP_PROTECTION_PROFILE_insert(st, val, i) SKM_sk_insert(SRTP_PROTECTION_PROFILE, (st), (val), (i)) +#define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRTP_PROTECTION_PROFILE, (st), (cmp)) +#define sk_SRTP_PROTECTION_PROFILE_dup(st) SKM_sk_dup(SRTP_PROTECTION_PROFILE, st) +#define sk_SRTP_PROTECTION_PROFILE_pop_free(st, free_func) SKM_sk_pop_free(SRTP_PROTECTION_PROFILE, (st), (free_func)) +#define sk_SRTP_PROTECTION_PROFILE_shift(st) SKM_sk_shift(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_pop(st) SKM_sk_pop(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_sort(st) SKM_sk_sort(SRTP_PROTECTION_PROFILE, (st)) +#define sk_SRTP_PROTECTION_PROFILE_is_sorted(st) SKM_sk_is_sorted(SRTP_PROTECTION_PROFILE, (st)) + #define sk_SSL_CIPHER_new(cmp) SKM_sk_new(SSL_CIPHER, (cmp)) #define sk_SSL_CIPHER_new_null() SKM_sk_new_null(SSL_CIPHER) #define sk_SSL_CIPHER_free(st) SKM_sk_free(SSL_CIPHER, (st)) @@ -2056,31 +2144,6 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_OPENSSL_STRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_STRING, (st)) -#define sk_OPENSSL_PSTRING_new(cmp) ((STACK_OF(OPENSSL_PSTRING) *)sk_new(CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) -#define sk_OPENSSL_PSTRING_new_null() ((STACK_OF(OPENSSL_PSTRING) *)sk_new_null()) -#define sk_OPENSSL_PSTRING_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i)) -#define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func)) -#define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i) -#define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_zero(st) SKM_sk_zero(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_unshift(st, val) sk_unshift(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_find_ex(st, val) sk_find_ex((_STACK *)CHECKED_CONST_PTR_OF(STACK_OF(OPENSSL_PSTRING), st), CHECKED_CONST_PTR_OF(OPENSSL_STRING, val)) -#define sk_OPENSSL_PSTRING_delete(st, i) SKM_sk_delete(OPENSSL_PSTRING, (st), (i)) -#define sk_OPENSSL_PSTRING_delete_ptr(st, ptr) (OPENSSL_PSTRING *)sk_delete_ptr(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, ptr)) -#define sk_OPENSSL_PSTRING_set_cmp_func(st, cmp) \ - ((int (*)(const OPENSSL_STRING * const *,const OPENSSL_STRING * const *)) \ - sk_set_cmp_func(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) -#define sk_OPENSSL_PSTRING_dup(st) SKM_sk_dup(OPENSSL_PSTRING, st) -#define sk_OPENSSL_PSTRING_shift(st) SKM_sk_shift(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_pop(st) (OPENSSL_STRING *)sk_pop(CHECKED_STACK_OF(OPENSSL_PSTRING, st)) -#define sk_OPENSSL_PSTRING_sort(st) SKM_sk_sort(OPENSSL_PSTRING, (st)) -#define sk_OPENSSL_PSTRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_PSTRING, (st)) - - #define sk_OPENSSL_BLOCK_new(cmp) ((STACK_OF(OPENSSL_BLOCK) *)sk_new(CHECKED_SK_CMP_FUNC(void, cmp))) #define sk_OPENSSL_BLOCK_new_null() ((STACK_OF(OPENSSL_BLOCK) *)sk_new_null()) #define sk_OPENSSL_BLOCK_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val)) @@ -2106,6 +2169,31 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) #define sk_OPENSSL_BLOCK_is_sorted(st) SKM_sk_is_sorted(OPENSSL_BLOCK, (st)) +#define sk_OPENSSL_PSTRING_new(cmp) ((STACK_OF(OPENSSL_PSTRING) *)sk_new(CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) +#define sk_OPENSSL_PSTRING_new_null() ((STACK_OF(OPENSSL_PSTRING) *)sk_new_null()) +#define sk_OPENSSL_PSTRING_push(st, val) sk_push(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i)) +#define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func)) +#define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i) +#define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_zero(st) SKM_sk_zero(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_unshift(st, val) sk_unshift(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_find_ex(st, val) sk_find_ex((_STACK *)CHECKED_CONST_PTR_OF(STACK_OF(OPENSSL_PSTRING), st), CHECKED_CONST_PTR_OF(OPENSSL_STRING, val)) +#define sk_OPENSSL_PSTRING_delete(st, i) SKM_sk_delete(OPENSSL_PSTRING, (st), (i)) +#define sk_OPENSSL_PSTRING_delete_ptr(st, ptr) (OPENSSL_PSTRING *)sk_delete_ptr(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, ptr)) +#define sk_OPENSSL_PSTRING_set_cmp_func(st, cmp) \ + ((int (*)(const OPENSSL_STRING * const *,const OPENSSL_STRING * const *)) \ + sk_set_cmp_func(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_CMP_FUNC(OPENSSL_STRING, cmp))) +#define sk_OPENSSL_PSTRING_dup(st) SKM_sk_dup(OPENSSL_PSTRING, st) +#define sk_OPENSSL_PSTRING_shift(st) SKM_sk_shift(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_pop(st) (OPENSSL_STRING *)sk_pop(CHECKED_STACK_OF(OPENSSL_PSTRING, st)) +#define sk_OPENSSL_PSTRING_sort(st) SKM_sk_sort(OPENSSL_PSTRING, (st)) +#define sk_OPENSSL_PSTRING_is_sorted(st) SKM_sk_is_sorted(OPENSSL_PSTRING, (st)) + + #define d2i_ASN1_SET_OF_ACCESS_DESCRIPTION(st, pp, length, d2i_func, free_func, ex_tag, ex_class) \ SKM_ASN1_SET_OF_d2i(ACCESS_DESCRIPTION, (st), (pp), (length), (d2i_func), (free_func), (ex_tag), (ex_class)) #define i2d_ASN1_SET_OF_ACCESS_DESCRIPTION(st, pp, i2d_func, ex_tag, ex_class, is_set) \ diff --git a/app/openssl/include/openssl/sha.h b/app/openssl/include/openssl/sha.h index 16cacf9f..8a6bf4bb 100644 --- a/app/openssl/include/openssl/sha.h +++ b/app/openssl/include/openssl/sha.h @@ -106,6 +106,9 @@ typedef struct SHAstate_st } SHA_CTX; #ifndef OPENSSL_NO_SHA0 +#ifdef OPENSSL_FIPS +int private_SHA_Init(SHA_CTX *c); +#endif int SHA_Init(SHA_CTX *c); int SHA_Update(SHA_CTX *c, const void *data, size_t len); int SHA_Final(unsigned char *md, SHA_CTX *c); @@ -113,6 +116,9 @@ unsigned char *SHA(const unsigned char *d, size_t n, unsigned char *md); void SHA_Transform(SHA_CTX *c, const unsigned char *data); #endif #ifndef OPENSSL_NO_SHA1 +#ifdef OPENSSL_FIPS +int private_SHA1_Init(SHA_CTX *c); +#endif int SHA1_Init(SHA_CTX *c); int SHA1_Update(SHA_CTX *c, const void *data, size_t len); int SHA1_Final(unsigned char *md, SHA_CTX *c); @@ -135,6 +141,10 @@ typedef struct SHA256state_st } SHA256_CTX; #ifndef OPENSSL_NO_SHA256 +#ifdef OPENSSL_FIPS +int private_SHA224_Init(SHA256_CTX *c); +int private_SHA256_Init(SHA256_CTX *c); +#endif int SHA224_Init(SHA256_CTX *c); int SHA224_Update(SHA256_CTX *c, const void *data, size_t len); int SHA224_Final(unsigned char *md, SHA256_CTX *c); @@ -182,6 +192,10 @@ typedef struct SHA512state_st #endif #ifndef OPENSSL_NO_SHA512 +#ifdef OPENSSL_FIPS +int private_SHA384_Init(SHA512_CTX *c); +int private_SHA512_Init(SHA512_CTX *c); +#endif int SHA384_Init(SHA512_CTX *c); int SHA384_Update(SHA512_CTX *c, const void *data, size_t len); int SHA384_Final(unsigned char *md, SHA512_CTX *c); diff --git a/app/openssl/include/openssl/srp.h b/app/openssl/include/openssl/srp.h new file mode 100644 index 00000000..7ec7825c --- /dev/null +++ b/app/openssl/include/openssl/srp.h @@ -0,0 +1,172 @@ +/* crypto/srp/srp.h */ +/* Written by Christophe Renou (christophe.renou@edelweb.fr) with + * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#ifndef __SRP_H__ +#define __SRP_H__ + +#ifndef OPENSSL_NO_SRP + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +typedef struct SRP_gN_cache_st + { + char *b64_bn; + BIGNUM *bn; + } SRP_gN_cache; + + +DECLARE_STACK_OF(SRP_gN_cache) + +typedef struct SRP_user_pwd_st + { + char *id; + BIGNUM *s; + BIGNUM *v; + const BIGNUM *g; + const BIGNUM *N; + char *info; + } SRP_user_pwd; + +DECLARE_STACK_OF(SRP_user_pwd) + +typedef struct SRP_VBASE_st + { + STACK_OF(SRP_user_pwd) *users_pwd; + STACK_OF(SRP_gN_cache) *gN_cache; +/* to simulate a user */ + char *seed_key; + BIGNUM *default_g; + BIGNUM *default_N; + } SRP_VBASE; + + +/*Structure interne pour retenir les couples N et g*/ +typedef struct SRP_gN_st + { + char *id; + BIGNUM *g; + BIGNUM *N; + } SRP_gN; + +DECLARE_STACK_OF(SRP_gN) + +SRP_VBASE *SRP_VBASE_new(char *seed_key); +int SRP_VBASE_free(SRP_VBASE *vb); +int SRP_VBASE_init(SRP_VBASE *vb, char * verifier_file); +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g); +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g); + + +#define SRP_NO_ERROR 0 +#define SRP_ERR_VBASE_INCOMPLETE_FILE 1 +#define SRP_ERR_VBASE_BN_LIB 2 +#define SRP_ERR_OPEN_FILE 3 +#define SRP_ERR_MEMORY 4 + +#define DB_srptype 0 +#define DB_srpverifier 1 +#define DB_srpsalt 2 +#define DB_srpid 3 +#define DB_srpgN 4 +#define DB_srpinfo 5 +#undef DB_NUMBER +#define DB_NUMBER 6 + +#define DB_SRP_INDEX 'I' +#define DB_SRP_VALID 'V' +#define DB_SRP_REVOKED 'R' +#define DB_SRP_MODIF 'v' + + +/* see srp.c */ +char * SRP_check_known_gN_param(BIGNUM* g, BIGNUM* N); +SRP_gN *SRP_get_default_gN(const char * id) ; + +/* server side .... */ +BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N); +BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v); +int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N); +BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N) ; + + + +/* client side .... */ +BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass); +BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g); +BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u); +int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N); + +#define SRP_MINIMAL_N 1024 + +#ifdef __cplusplus +} +#endif + +#endif +#endif diff --git a/app/openssl/include/openssl/srtp.h b/app/openssl/include/openssl/srtp.h new file mode 100644 index 00000000..c0cf33ef --- /dev/null +++ b/app/openssl/include/openssl/srtp.h @@ -0,0 +1,145 @@ +/* ssl/tls1.h */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ +/* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* + DTLS code by Eric Rescorla + + Copyright (C) 2006, Network Resonance, Inc. + Copyright (C) 2011, RTFM, Inc. +*/ + +#ifndef HEADER_D1_SRTP_H +#define HEADER_D1_SRTP_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#define SRTP_AES128_CM_SHA1_80 0x0001 +#define SRTP_AES128_CM_SHA1_32 0x0002 +#define SRTP_AES128_F8_SHA1_80 0x0003 +#define SRTP_AES128_F8_SHA1_32 0x0004 +#define SRTP_NULL_SHA1_80 0x0005 +#define SRTP_NULL_SHA1_32 0x0006 + +int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles); +int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles); +SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); + +STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl); +SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/app/openssl/include/openssl/ssl.h b/app/openssl/include/openssl/ssl.h index 9cb2bf93..54b0eb6c 100644 --- a/app/openssl/include/openssl/ssl.h +++ b/app/openssl/include/openssl/ssl.h @@ -252,6 +252,7 @@ extern "C" { #define SSL_TXT_kEECDH "kEECDH" #define SSL_TXT_kPSK "kPSK" #define SSL_TXT_kGOST "kGOST" +#define SSL_TXT_kSRP "kSRP" #define SSL_TXT_aRSA "aRSA" #define SSL_TXT_aDSS "aDSS" @@ -275,6 +276,7 @@ extern "C" { #define SSL_TXT_ECDSA "ECDSA" #define SSL_TXT_KRB5 "KRB5" #define SSL_TXT_PSK "PSK" +#define SSL_TXT_SRP "SRP" #define SSL_TXT_DES "DES" #define SSL_TXT_3DES "3DES" @@ -285,6 +287,7 @@ extern "C" { #define SSL_TXT_AES128 "AES128" #define SSL_TXT_AES256 "AES256" #define SSL_TXT_AES "AES" +#define SSL_TXT_AES_GCM "AESGCM" #define SSL_TXT_CAMELLIA128 "CAMELLIA128" #define SSL_TXT_CAMELLIA256 "CAMELLIA256" #define SSL_TXT_CAMELLIA "CAMELLIA" @@ -294,10 +297,14 @@ extern "C" { #define SSL_TXT_SHA "SHA" /* same as "SHA1" */ #define SSL_TXT_GOST94 "GOST94" #define SSL_TXT_GOST89MAC "GOST89MAC" +#define SSL_TXT_SHA256 "SHA256" +#define SSL_TXT_SHA384 "SHA384" #define SSL_TXT_SSLV2 "SSLv2" #define SSL_TXT_SSLV3 "SSLv3" #define SSL_TXT_TLSV1 "TLSv1" +#define SSL_TXT_TLSV1_1 "TLSv1.1" +#define SSL_TXT_TLSV1_2 "TLSv1.2" #define SSL_TXT_EXP "EXP" #define SSL_TXT_EXPORT "EXPORT" @@ -356,9 +363,29 @@ extern "C" { * in SSL_CTX. */ typedef struct ssl_st *ssl_crock_st; typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; +typedef struct ssl_method_st SSL_METHOD; +typedef struct ssl_cipher_st SSL_CIPHER; +typedef struct ssl_session_st SSL_SESSION; + +DECLARE_STACK_OF(SSL_CIPHER) + +/* SRTP protection profiles for use with the use_srtp extension (RFC 5764)*/ +typedef struct srtp_protection_profile_st + { + const char *name; + unsigned long id; + } SRTP_PROTECTION_PROFILE; + +DECLARE_STACK_OF(SRTP_PROTECTION_PROFILE) + +typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, int len, void *arg); +typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, STACK_OF(SSL_CIPHER) *peer_ciphers, SSL_CIPHER **cipher, void *arg); + + +#ifndef OPENSSL_NO_SSL_INTERN /* used to hold info on the particular ciphers used */ -typedef struct ssl_cipher_st +struct ssl_cipher_st { int valid; const char *name; /* text name */ @@ -375,15 +402,11 @@ typedef struct ssl_cipher_st unsigned long algorithm2; /* Extra flags */ int strength_bits; /* Number of bits really used */ int alg_bits; /* Number of bits for algorithm */ - } SSL_CIPHER; - -DECLARE_STACK_OF(SSL_CIPHER) + }; -typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, int len, void *arg); -typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, STACK_OF(SSL_CIPHER) *peer_ciphers, SSL_CIPHER **cipher, void *arg); /* Used to hold functions for SSLv2 or SSLv3/TLSv1 functions */ -typedef struct ssl_method_st +struct ssl_method_st { int version; int (*ssl_new)(SSL *s); @@ -416,7 +439,7 @@ typedef struct ssl_method_st int (*ssl_version)(void); long (*ssl_callback_ctrl)(SSL *s, int cb_id, void (*fp)(void)); long (*ssl_ctx_callback_ctrl)(SSL_CTX *s, int cb_id, void (*fp)(void)); - } SSL_METHOD; + }; /* Lets make this into an ASN.1 type structure as follows * SSL_SESSION_ID ::= SEQUENCE { @@ -433,14 +456,17 @@ typedef struct ssl_method_st * Session_ID_context [ 4 ] EXPLICIT OCTET STRING, -- the Session ID context * Verify_result [ 5 ] EXPLICIT INTEGER, -- X509_V_... code for `Peer' * HostName [ 6 ] EXPLICIT OCTET STRING, -- optional HostName from servername TLS extension - * ECPointFormatList [ 7 ] OCTET STRING, -- optional EC point format list from TLS extension - * PSK_identity_hint [ 8 ] EXPLICIT OCTET STRING, -- optional PSK identity hint - * PSK_identity [ 9 ] EXPLICIT OCTET STRING -- optional PSK identity + * PSK_identity_hint [ 7 ] EXPLICIT OCTET STRING, -- optional PSK identity hint + * PSK_identity [ 8 ] EXPLICIT OCTET STRING, -- optional PSK identity + * Ticket_lifetime_hint [9] EXPLICIT INTEGER, -- server's lifetime hint for session ticket + * Ticket [10] EXPLICIT OCTET STRING, -- session ticket (clients only) + * Compression_meth [11] EXPLICIT OCTET STRING, -- optional compression method + * SRP_username [ 12 ] EXPLICIT OCTET STRING -- optional SRP username * } * Look in ssl/ssl_asn1.c for more details * I'm using EXPLICIT tags so I can read the damn things using asn1parse :-). */ -typedef struct ssl_session_st +struct ssl_session_st { int ssl_version; /* what ssl version session info is * being kept in here? */ @@ -467,6 +493,9 @@ typedef struct ssl_session_st char *psk_identity_hint; char *psk_identity; #endif + /* Used to indicate that session resumption is not allowed. + * Applications can also set this bit for a new session via + * not_resumable_session_cb to disable session caching and tickets. */ int not_resumable; /* The cert is the certificate used to establish this connection */ @@ -509,11 +538,15 @@ typedef struct ssl_session_st #endif /* OPENSSL_NO_EC */ /* RFC4507 info */ unsigned char *tlsext_tick; /* Session ticket */ - size_t tlsext_ticklen; /* Session ticket length */ + size_t tlsext_ticklen; /* Session ticket length */ long tlsext_tick_lifetime_hint; /* Session lifetime hint in seconds */ #endif - } SSL_SESSION; +#ifndef OPENSSL_NO_SRP + char *srp_username; +#endif + }; +#endif #define SSL_OP_MICROSOFT_SESS_ID_BUG 0x00000001L #define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x00000002L @@ -522,21 +555,28 @@ typedef struct ssl_session_st #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L #define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L -#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x00000040L /* no effect since 0.9.7h and 0.9.8b */ +#define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L #define SSL_OP_TLS_D5_BUG 0x00000100L #define SSL_OP_TLS_BLOCK_PADDING_BUG 0x00000200L -/* Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added - * in OpenSSL 0.9.6d. Usually (depending on the application protocol) - * the workaround is not needed. Unfortunately some broken SSL/TLS - * implementations cannot handle it at all, which is why we include - * it in SSL_OP_ALL. */ +/* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ +#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 + +/* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the + * insertion of empty records in CBC mode, but the empty records were commonly + * misinterpreted as EOF by other TLS stacks and so this was disabled by + * SSL_OP_ALL. + * + * This has been replaced by 1/n-1 record splitting, which is enabled by + * SSL_MODE_CBC_RECORD_SPLITTING in SSL_set_mode. This involves sending a + * one-byte record rather than an empty record and has much better + * compatibility. */ #define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS 0x00000800L /* added in 0.9.6e */ /* SSL_OP_ALL: various bug workarounds that should be rather harmless. * This used to be 0x000FFFFFL before 0.9.7. */ -#define SSL_OP_ALL 0x80000FFFL +#define SSL_OP_ALL 0x80000BFFL /* DTLS options */ #define SSL_OP_NO_QUERY_MTU 0x00001000L @@ -572,11 +612,17 @@ typedef struct ssl_session_st #define SSL_OP_NO_SSLv2 0x01000000L #define SSL_OP_NO_SSLv3 0x02000000L #define SSL_OP_NO_TLSv1 0x04000000L +#define SSL_OP_NO_TLSv1_2 0x08000000L +#define SSL_OP_NO_TLSv1_1 0x10000000L +/* These next two were never actually used for anything since SSLeay + * zap so we have some more flags. + */ /* The next flag deliberately changes the ciphertest, this is a check * for the PKCS#1 attack */ -#define SSL_OP_PKCS1_CHECK_1 0x08000000L -#define SSL_OP_PKCS1_CHECK_2 0x10000000L +#define SSL_OP_PKCS1_CHECK_1 0x0 +#define SSL_OP_PKCS1_CHECK_2 0x0 + #define SSL_OP_NETSCAPE_CA_DN_BUG 0x20000000L #define SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG 0x40000000L /* Make server add server-hello extension from early version of @@ -602,13 +648,21 @@ typedef struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L -/* Use small read and write buffers: (a) lazy allocate read buffers for - * large incoming records, and (b) limit the size of outgoing records. */ -#define SSL_MODE_SMALL_BUFFERS 0x00000020L +/* Send the current time in the Random fields of the ClientHello and + * ServerHello records for compatibility with hypothetical implementations + * that require it. + */ +#define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L +#define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ -#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000040L +#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L +/* When set, TLS 1.0 and SSLv3, multi-byte, CBC records will be split in two: + * the first record will contain a single byte and the second will contain the + * rest of the bytes. This effectively randomises the IV and prevents BEAST + * attacks. */ +#define SSL_MODE_CBC_RECORD_SPLITTING 0x00000100L /* Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, * they cannot be used to clear bits. */ @@ -644,12 +698,53 @@ typedef struct ssl_session_st #define SSL_get_secure_renegotiation_support(ssl) \ SSL_ctrl((ssl), SSL_CTRL_GET_RI_SUPPORT, 0, NULL) +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_heartbeat(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_TLS_EXT_SEND_HEARTBEAT,0,NULL) +#endif + void SSL_CTX_set_msg_callback(SSL_CTX *ctx, void (*cb)(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)); void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)); #define SSL_CTX_set_msg_callback_arg(ctx, arg) SSL_CTX_ctrl((ctx), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) #define SSL_set_msg_callback_arg(ssl, arg) SSL_ctrl((ssl), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) +#ifndef OPENSSL_NO_SRP + +#ifndef OPENSSL_NO_SSL_INTERN + +typedef struct srp_ctx_st + { + /* param for all the callbacks */ + void *SRP_cb_arg; + /* set client Hello login callback */ + int (*TLS_ext_srp_username_callback)(SSL *, int *, void *); + /* set SRP N/g param callback for verification */ + int (*SRP_verify_param_callback)(SSL *, void *); + /* set SRP client passwd callback */ + char *(*SRP_give_srp_client_pwd_callback)(SSL *, void *); + + char *login; + BIGNUM *N,*g,*s,*B,*A; + BIGNUM *a,*b,*v; + char *info; + int strength; + + unsigned long srp_Mask; + } SRP_CTX; + +#endif + +/* see tls_srp.c */ +int SSL_SRP_CTX_init(SSL *s); +int SSL_CTX_SRP_CTX_init(SSL_CTX *ctx); +int SSL_SRP_CTX_free(SSL *ctx); +int SSL_CTX_SRP_CTX_free(SSL_CTX *ctx); +int SSL_srp_server_param_with_username(SSL *s, int *ad); +int SRP_generate_server_master_secret(SSL *s,unsigned char *master_key); +int SRP_Calc_A_param(SSL *s); +int SRP_generate_client_master_secret(SSL *s,unsigned char *master_key); +#endif #if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WIN32) #define SSL_MAX_CERT_LIST_DEFAULT 1024*30 /* 30k max cert list :-) */ @@ -675,7 +770,11 @@ void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int con typedef int (*GEN_SESSION_CB)(const SSL *ssl, unsigned char *id, unsigned int *id_len); -typedef struct ssl_comp_st +typedef struct ssl_comp_st SSL_COMP; + +#ifndef OPENSSL_NO_SSL_INTERN + +struct ssl_comp_st { int id; const char *name; @@ -684,7 +783,7 @@ typedef struct ssl_comp_st #else char *method; #endif - } SSL_COMP; + }; DECLARE_STACK_OF(SSL_COMP) DECLARE_LHASH_OF(SSL_SESSION); @@ -829,7 +928,7 @@ struct ssl_ctx_st */ unsigned int max_send_fragment; -#ifndef OPENSSL_ENGINE +#ifndef OPENSSL_NO_ENGINE /* Engine to pass requests for client certs to */ ENGINE *client_cert_engine; @@ -857,6 +956,28 @@ struct ssl_ctx_st /* draft-rescorla-tls-opaque-prf-input-00.txt information */ int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg); void *tlsext_opaque_prf_input_callback_arg; +#endif + +#ifndef OPENSSL_NO_PSK + char *psk_identity_hint; + unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, + unsigned int max_identity_len, unsigned char *psk, + unsigned int max_psk_len); + unsigned int (*psk_server_callback)(SSL *ssl, const char *identity, + unsigned char *psk, unsigned int max_psk_len); +#endif + +#ifndef OPENSSL_NO_BUF_FREELISTS +#define SSL_MAX_BUF_FREELIST_LEN_DEFAULT 32 + unsigned int freelist_max_len; + struct ssl3_buf_freelist_st *wbuf_freelist; + struct ssl3_buf_freelist_st *rbuf_freelist; +#endif +#ifndef OPENSSL_NO_SRP + SRP_CTX srp_ctx; /* ctx for SRP authentication */ +#endif + +#ifndef OPENSSL_NO_TLSEXT # ifndef OPENSSL_NO_NEXTPROTONEG /* Next protocol negotiation information */ @@ -876,24 +997,43 @@ struct ssl_ctx_st void *arg); void *next_proto_select_cb_arg; # endif -#endif -#ifndef OPENSSL_NO_PSK - char *psk_identity_hint; - unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, - unsigned int max_identity_len, unsigned char *psk, - unsigned int max_psk_len); - unsigned int (*psk_server_callback)(SSL *ssl, const char *identity, - unsigned char *psk, unsigned int max_psk_len); + /* ALPN information + * (we are in the process of transitioning from NPN to ALPN.) */ + + /* For a server, this contains a callback function that allows the + * server to select the protocol for the connection. + * out: on successful return, this must point to the raw protocol + * name (without the length prefix). + * outlen: on successful return, this contains the length of |*out|. + * in: points to the client's list of supported protocols in + * wire-format. + * inlen: the length of |in|. */ + int (*alpn_select_cb)(SSL *s, + const unsigned char **out, + unsigned char *outlen, + const unsigned char* in, + unsigned int inlen, + void *arg); + void *alpn_select_cb_arg; + + /* For a client, this contains the list of supported protocols in wire + * format. */ + unsigned char* alpn_client_proto_list; + unsigned alpn_client_proto_list_len; + + /* SRTP profiles we are willing to do from RFC 5764 */ + STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles; + + /* If true, a client will advertise the Channel ID extension and a + * server will echo it. */ + char tlsext_channel_id_enabled; + /* The client's Channel ID private key. */ + EVP_PKEY *tlsext_channel_id_private; #endif + }; -#ifndef OPENSSL_NO_BUF_FREELISTS -#define SSL_MAX_BUF_FREELIST_LEN_DEFAULT 32 - unsigned int freelist_max_len; - struct ssl3_buf_freelist_st *wbuf_freelist; - struct ssl3_buf_freelist_st *rbuf_freelist; #endif - }; #define SSL_SESS_CACHE_OFF 0x0000 #define SSL_SESS_CACHE_CLIENT 0x0001 @@ -931,6 +1071,10 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx); SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL) #define SSL_CTX_sess_cache_full(ctx) \ SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL) +/* SSL_CTX_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_CTX_enable_tls_channel_id(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, int (*new_session_cb)(struct ssl_st *ssl,SSL_SESSION *sess)); int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx))(struct ssl_st *ssl, SSL_SESSION *sess); @@ -952,26 +1096,43 @@ void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s, int (*cb) (SSL *ssl, const unsigned char **out, unsigned int *outlen, - void *arg), void *arg); + void *arg), + void *arg); void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, - int (*cb) (SSL *ssl, unsigned char **out, + int (*cb) (SSL *ssl, + unsigned char **out, unsigned char *outlen, const unsigned char *in, - unsigned int inlen, void *arg), + unsigned int inlen, + void *arg), void *arg); int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, const unsigned char *client, unsigned int client_len); -void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, - unsigned *len); +void SSL_get0_next_proto_negotiated(const SSL *s, + const unsigned char **data, unsigned *len); #define OPENSSL_NPN_UNSUPPORTED 0 #define OPENSSL_NPN_NEGOTIATED 1 #define OPENSSL_NPN_NO_OVERLAP 2 - #endif +int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char* protos, + unsigned protos_len); +int SSL_set_alpn_protos(SSL *ssl, const unsigned char* protos, + unsigned protos_len); +void SSL_CTX_set_alpn_select_cb(SSL_CTX* ctx, + int (*cb) (SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg), + void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned *len); + #ifndef OPENSSL_NO_PSK /* the maximum length of the buffer given to callbacks containing the * resulting identity/psk */ @@ -1011,6 +1172,8 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 +#ifndef OPENSSL_NO_SSL_INTERN + struct ssl_st { /* protocol version @@ -1055,9 +1218,7 @@ struct ssl_st int server; /* are we the server side? - mostly used by SSL_clear*/ - int new_session;/* 1 if we are to use a new session. - * 2 if we are a server and are inside a handshake - * (i.e. not just sending a HelloRequest) + int new_session;/* Generate a new session or reuse an old one. * NB: For servers, the 'new' session may actually be a previously * cached session or even the previous session unless * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is set */ @@ -1154,6 +1315,10 @@ struct ssl_st #endif /* OPENSSL_NO_KRB5 */ #ifndef OPENSSL_NO_PSK + /* PSK identity hint is stored here only to enable setting a hint on an SSL object before an + * SSL_SESSION is associated with it. Once an SSL_SESSION is associated with this SSL object, + * the psk_identity_hint from the session takes precedence over this one. */ + char *psk_identity_hint; unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, unsigned int max_identity_len, unsigned char *psk, unsigned int max_psk_len); @@ -1244,11 +1409,44 @@ struct ssl_st #endif #define session_ctx initial_ctx + + STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles; /* What we'll do */ + SRTP_PROTECTION_PROFILE *srtp_profile; /* What's been chosen */ + + unsigned int tlsext_heartbeat; /* Is use of the Heartbeat extension negotiated? + 0: disabled + 1: enabled + 2: enabled, but not allowed to send Requests + */ + unsigned int tlsext_hb_pending; /* Indicates if a HeartbeatRequest is in flight */ + unsigned int tlsext_hb_seq; /* HeartbeatRequest sequence number */ + + /* Copied from the SSL_CTX. For a server, means that we'll accept + * Channel IDs from clients. For a client, means that we'll advertise + * support. */ + char tlsext_channel_id_enabled; + /* The client's Channel ID private key. */ + EVP_PKEY *tlsext_channel_id_private; + + /* For a client, this contains the list of supported protocols in wire + * format. */ + unsigned char* alpn_client_proto_list; + unsigned alpn_client_proto_list_len; #else #define session_ctx ctx #endif /* OPENSSL_NO_TLSEXT */ + + int renegotiate;/* 1 if we are renegotiating. + * 2 if we are a server and are inside a handshake + * (i.e. not just sending a HelloRequest) */ + +#ifndef OPENSSL_NO_SRP + SRP_CTX srp_ctx; /* ctx for SRP authentication */ +#endif }; +#endif + #ifdef __cplusplus } #endif @@ -1258,6 +1456,7 @@ struct ssl_st #include /* This is mostly sslv3 with a few tweaks */ #include /* Datagram TLS */ #include +#include /* Support for the use_srtp extension */ #ifdef __cplusplus extern "C" { @@ -1304,7 +1503,7 @@ extern "C" { #define SSL_get_state(a) SSL_state(a) #define SSL_is_init_finished(a) (SSL_state(a) == SSL_ST_OK) #define SSL_in_init(a) ((SSL_state(a)&SSL_ST_INIT) && \ - !SSL_cutthrough_complete(a)) + !SSL_cutthrough_complete(a)) #define SSL_in_before(a) (SSL_state(a)&SSL_ST_BEFORE) #define SSL_in_connect_init(a) (SSL_state(a)&SSL_ST_CONNECT) #define SSL_in_accept_init(a) (SSL_state(a)&SSL_ST_ACCEPT) @@ -1476,6 +1675,23 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP 71 #define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72 + +#define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB 75 +#define SSL_CTRL_SET_SRP_VERIFY_PARAM_CB 76 +#define SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB 77 + +#define SSL_CTRL_SET_SRP_ARG 78 +#define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME 79 +#define SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH 80 +#define SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD 81 +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_CTRL_TLS_EXT_SEND_HEARTBEAT 85 +#define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING 86 +#define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS 87 +#endif +#define SSL_CTRL_CHANNEL_ID 88 +#define SSL_CTRL_GET_CHANNEL_ID 89 +#define SSL_CTRL_SET_CHANNEL_ID 90 #endif #define DTLS_CTRL_GET_TIMEOUT 73 @@ -1486,6 +1702,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_CTRL_CLEAR_OPTIONS 77 #define SSL_CTRL_CLEAR_MODE 78 +#define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 +#define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 + #define DTLSv1_get_timeout(ssl, arg) \ SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg) #define DTLSv1_handle_timeout(ssl) \ @@ -1520,8 +1739,31 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ + SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to + * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on + * success. */ +#define SSL_set1_tls_channel_id(s, private_key) \ + SSL_ctrl(s,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key) +#define SSL_CTX_set1_tls_channel_id(ctx, private_key) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key) +/* SSL_get_tls_channel_id gets the client's TLS Channel ID from a server SSL* + * and copies up to the first |channel_id_len| bytes into |channel_id|. The + * Channel ID consists of the client's P-256 public key as an (x,y) pair where + * each is a 32-byte, big-endian field element. Returns 0 if the client didn't + * offer a Channel ID and the length of the complete Channel ID otherwise. */ +#define SSL_get_tls_channel_id(ctx, channel_id, channel_id_len) \ + SSL_ctrl(ctx,SSL_CTRL_GET_CHANNEL_ID,channel_id_len,(void*)channel_id) + #define SSL_CTX_add_extra_chain_cert(ctx,x509) \ SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509) +#define SSL_CTX_get_extra_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +#define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) #ifndef OPENSSL_NO_BIO BIO_METHOD *BIO_f_ssl(void); @@ -1549,7 +1791,8 @@ const SSL_CIPHER *SSL_get_current_cipher(const SSL *s); int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); -const char * SSL_CIPHER_authentication_method(const SSL_CIPHER *c); +unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -1619,11 +1862,15 @@ long SSL_SESSION_set_time(SSL_SESSION *s, long t); long SSL_SESSION_get_timeout(const SSL_SESSION *s); long SSL_SESSION_set_timeout(SSL_SESSION *s, long t); void SSL_copy_session_id(SSL *to,const SSL *from); +X509 *SSL_SESSION_get0_peer(SSL_SESSION *s); +int SSL_SESSION_set1_id_context(SSL_SESSION *s,const unsigned char *sid_ctx, + unsigned int sid_ctx_len); SSL_SESSION *SSL_SESSION_new(void); const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, unsigned int *len); const char * SSL_SESSION_get_version(const SSL_SESSION *s); +unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s); #ifndef OPENSSL_NO_FP_API int SSL_SESSION_print_fp(FILE *fp,const SSL_SESSION *ses); #endif @@ -1687,6 +1934,30 @@ int SSL_set_trust(SSL *s, int trust); int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); +#ifndef OPENSSL_NO_SRP +int SSL_CTX_set_srp_username(SSL_CTX *ctx,char *name); +int SSL_CTX_set_srp_password(SSL_CTX *ctx,char *password); +int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength); +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, + char *(*cb)(SSL *,void *)); +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, + int (*cb)(SSL *,void *)); +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb)(SSL *,int *,void *)); +int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg); + +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info); +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, + const char *grp); + +BIGNUM *SSL_get_srp_g(SSL *s); +BIGNUM *SSL_get_srp_N(SSL *s); + +char *SSL_get_srp_username(SSL *s); +char *SSL_get_srp_userinfo(SSL *s); +#endif + void SSL_free(SSL *ssl); int SSL_accept(SSL *ssl); int SSL_connect(SSL *ssl); @@ -1722,6 +1993,15 @@ const SSL_METHOD *TLSv1_method(void); /* TLSv1.0 */ const SSL_METHOD *TLSv1_server_method(void); /* TLSv1.0 */ const SSL_METHOD *TLSv1_client_method(void); /* TLSv1.0 */ +const SSL_METHOD *TLSv1_1_method(void); /* TLSv1.1 */ +const SSL_METHOD *TLSv1_1_server_method(void); /* TLSv1.1 */ +const SSL_METHOD *TLSv1_1_client_method(void); /* TLSv1.1 */ + +const SSL_METHOD *TLSv1_2_method(void); /* TLSv1.2 */ +const SSL_METHOD *TLSv1_2_server_method(void); /* TLSv1.2 */ +const SSL_METHOD *TLSv1_2_client_method(void); /* TLSv1.2 */ + + const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_server_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_client_method(void); /* DTLSv1.0 */ @@ -1730,6 +2010,7 @@ STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); int SSL_do_handshake(SSL *s); int SSL_renegotiate(SSL *s); +int SSL_renegotiate_abbreviated(SSL *s); int SSL_renegotiate_pending(SSL *s); int SSL_shutdown(SSL *s); @@ -1781,6 +2062,7 @@ void SSL_set_info_callback(SSL *ssl, void (*cb)(const SSL *ssl,int type,int val)); void (*SSL_get_info_callback(const SSL *ssl))(const SSL *ssl,int type,int val); int SSL_state(const SSL *ssl); +void SSL_set_state(SSL *ssl, int state); void SSL_set_verify_result(SSL *ssl,long v); long SSL_get_verify_result(const SSL *ssl); @@ -1881,6 +2163,9 @@ int SSL_set_session_ticket_ext_cb(SSL *s, tls_session_ticket_ext_cb_fn cb, /* Pre-shared secret session resumption functions */ int SSL_set_session_secret_cb(SSL *s, tls_session_secret_cb_fn tls_session_secret_cb, void *arg); +void SSL_set_debug(SSL *s, int debug); +int SSL_cache_hit(SSL *s); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -1900,6 +2185,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_DTLS1_ACCEPT 246 #define SSL_F_DTLS1_ADD_CERT_TO_BUF 295 #define SSL_F_DTLS1_BUFFER_RECORD 247 +#define SSL_F_DTLS1_CHECK_TIMEOUT_NUM 316 #define SSL_F_DTLS1_CLIENT_HELLO 248 #define SSL_F_DTLS1_CONNECT 249 #define SSL_F_DTLS1_ENC 250 @@ -1908,6 +2194,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_DTLS1_GET_MESSAGE_FRAGMENT 253 #define SSL_F_DTLS1_GET_RECORD 254 #define SSL_F_DTLS1_HANDLE_TIMEOUT 297 +#define SSL_F_DTLS1_HEARTBEAT 305 #define SSL_F_DTLS1_OUTPUT_CERT_CHAIN 255 #define SSL_F_DTLS1_PREPROCESS_FRAGMENT 288 #define SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE 256 @@ -1957,6 +2244,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_CALLBACK_CTRL 233 #define SSL_F_SSL3_CHANGE_CIPHER_STATE 129 #define SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM 130 +#define SSL_F_SSL3_CHECK_CLIENT_HELLO 304 #define SSL_F_SSL3_CLIENT_HELLO 131 #define SSL_F_SSL3_CONNECT 132 #define SSL_F_SSL3_CTRL 213 @@ -1968,6 +2256,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_GET_CERTIFICATE_REQUEST 135 #define SSL_F_SSL3_GET_CERT_STATUS 289 #define SSL_F_SSL3_GET_CERT_VERIFY 136 +#define SSL_F_SSL3_GET_CHANNEL_ID 317 #define SSL_F_SSL3_GET_CLIENT_CERTIFICATE 137 #define SSL_F_SSL3_GET_CLIENT_HELLO 138 #define SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE 139 @@ -1975,7 +2264,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_GET_KEY_EXCHANGE 141 #define SSL_F_SSL3_GET_MESSAGE 142 #define SSL_F_SSL3_GET_NEW_SESSION_TICKET 283 -#define SSL_F_SSL3_GET_NEXT_PROTO 304 +#define SSL_F_SSL3_GET_NEXT_PROTO 306 #define SSL_F_SSL3_GET_RECORD 143 #define SSL_F_SSL3_GET_SERVER_CERTIFICATE 144 #define SSL_F_SSL3_GET_SERVER_DONE 145 @@ -1987,6 +2276,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_READ_BYTES 148 #define SSL_F_SSL3_READ_N 149 #define SSL_F_SSL3_SEND_CERTIFICATE_REQUEST 150 +#define SSL_F_SSL3_SEND_CHANNEL_ID 318 #define SSL_F_SSL3_SEND_CLIENT_CERTIFICATE 151 #define SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE 152 #define SSL_F_SSL3_SEND_CLIENT_VERIFY 153 @@ -2000,10 +2290,12 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_WRITE_PENDING 159 #define SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT 298 #define SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT 277 +#define SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT 307 #define SSL_F_SSL_ADD_DIR_CERT_SUBJECTS_TO_STACK 215 #define SSL_F_SSL_ADD_FILE_CERT_SUBJECTS_TO_STACK 216 #define SSL_F_SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT 299 #define SSL_F_SSL_ADD_SERVERHELLO_TLSEXT 278 +#define SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT 308 #define SSL_F_SSL_BAD_METHOD 160 #define SSL_F_SSL_BYTES_TO_CIPHER_LIST 161 #define SSL_F_SSL_CERT_DUP 221 @@ -2020,6 +2312,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_CREATE_CIPHER_LIST 166 #define SSL_F_SSL_CTRL 232 #define SSL_F_SSL_CTX_CHECK_PRIVATE_KEY 168 +#define SSL_F_SSL_CTX_MAKE_PROFILES 309 #define SSL_F_SSL_CTX_NEW 169 #define SSL_F_SSL_CTX_SET_CIPHER_LIST 269 #define SSL_F_SSL_CTX_SET_CLIENT_CERT_ENGINE 290 @@ -2042,14 +2335,17 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_GET_NEW_SESSION 181 #define SSL_F_SSL_GET_PREV_SESSION 217 #define SSL_F_SSL_GET_SERVER_SEND_CERT 182 +#define SSL_F_SSL_GET_SERVER_SEND_PKEY 317 #define SSL_F_SSL_GET_SIGN_PKEY 183 #define SSL_F_SSL_INIT_WBIO_BUFFER 184 #define SSL_F_SSL_LOAD_CLIENT_CA_FILE 185 #define SSL_F_SSL_NEW 186 #define SSL_F_SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT 300 #define SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT 302 +#define SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT 310 #define SSL_F_SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT 301 #define SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT 303 +#define SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT 311 #define SSL_F_SSL_PEEK 270 #define SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT 281 #define SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT 282 @@ -2058,6 +2354,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_RSA_PUBLIC_ENCRYPT 188 #define SSL_F_SSL_SESSION_NEW 189 #define SSL_F_SSL_SESSION_PRINT_FP 190 +#define SSL_F_SSL_SESSION_SET1_ID_CONTEXT 312 #define SSL_F_SSL_SESS_CERT_NEW 225 #define SSL_F_SSL_SET_CERT 191 #define SSL_F_SSL_SET_CIPHER_LIST 271 @@ -2071,6 +2368,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_SET_TRUST 228 #define SSL_F_SSL_SET_WFD 196 #define SSL_F_SSL_SHUTDOWN 224 +#define SSL_F_SSL_SRP_CTX_INIT 313 #define SSL_F_SSL_UNDEFINED_CONST_FUNCTION 243 #define SSL_F_SSL_UNDEFINED_FUNCTION 197 #define SSL_F_SSL_UNDEFINED_VOID_FUNCTION 244 @@ -2091,6 +2389,8 @@ void ERR_load_SSL_strings(void); #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 #define SSL_F_TLS1_ENC 210 +#define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 +#define SSL_F_TLS1_HEARTBEAT 315 #define SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT 275 #define SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT 276 #define SSL_F_TLS1_PRF 284 @@ -2130,6 +2430,13 @@ void ERR_load_SSL_strings(void); #define SSL_R_BAD_RSA_MODULUS_LENGTH 121 #define SSL_R_BAD_RSA_SIGNATURE 122 #define SSL_R_BAD_SIGNATURE 123 +#define SSL_R_BAD_SRP_A_LENGTH 347 +#define SSL_R_BAD_SRP_B_LENGTH 348 +#define SSL_R_BAD_SRP_G_LENGTH 349 +#define SSL_R_BAD_SRP_N_LENGTH 350 +#define SSL_R_BAD_SRP_S_LENGTH 351 +#define SSL_R_BAD_SRTP_MKI_VALUE 352 +#define SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST 353 #define SSL_R_BAD_SSL_FILETYPE 124 #define SSL_R_BAD_SSL_SESSION_ID_LENGTH 125 #define SSL_R_BAD_STATE 126 @@ -2137,12 +2444,15 @@ void ERR_load_SSL_strings(void); #define SSL_R_BIO_NOT_SET 128 #define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG 129 #define SSL_R_BN_LIB 130 +#define SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY 376 #define SSL_R_CA_DN_LENGTH_MISMATCH 131 #define SSL_R_CA_DN_TOO_LONG 132 #define SSL_R_CCS_RECEIVED_EARLY 133 #define SSL_R_CERTIFICATE_VERIFY_FAILED 134 #define SSL_R_CERT_LENGTH_MISMATCH 135 #define SSL_R_CHALLENGE_IS_DIFFERENT 136 +#define SSL_R_CHANNEL_ID_NOT_P256 375 +#define SSL_R_CHANNEL_ID_SIGNATURE_INVALID 371 #define SSL_R_CIPHER_CODE_WRONG_LENGTH 137 #define SSL_R_CIPHER_OR_HASH_UNAVAILABLE 138 #define SSL_R_CIPHER_TABLE_SRC_ERROR 139 @@ -2155,6 +2465,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_CONNECTION_ID_IS_DIFFERENT 143 #define SSL_R_CONNECTION_TYPE_NOT_SET 144 #define SSL_R_COOKIE_MISMATCH 308 +#define SSL_R_D2I_ECDSA_SIG 379 #define SSL_R_DATA_BETWEEN_CCS_AND_FINISHED 145 #define SSL_R_DATA_LENGTH_TOO_LONG 146 #define SSL_R_DECRYPTION_FAILED 147 @@ -2168,14 +2479,18 @@ void ERR_load_SSL_strings(void); #define SSL_R_ECC_CERT_SHOULD_HAVE_RSA_SIGNATURE 322 #define SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE 323 #define SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER 310 +#define SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST 354 #define SSL_R_ENCRYPTED_LENGTH_TOO_LONG 150 #define SSL_R_ERROR_GENERATING_TMP_RSA_KEY 282 #define SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST 151 +#define SSL_R_EVP_DIGESTSIGNFINAL_FAILED 377 +#define SSL_R_EVP_DIGESTSIGNINIT_FAILED 378 #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152 #define SSL_R_EXTRA_DATA_IN_MESSAGE 153 #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154 -#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 346 -#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 347 +#define SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS 372 +#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 355 +#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 356 #define SSL_R_HTTPS_PROXY_REQUEST 155 #define SSL_R_HTTP_REQUEST 156 #define SSL_R_ILLEGAL_PADDING 283 @@ -2183,7 +2498,9 @@ void ERR_load_SSL_strings(void); #define SSL_R_INVALID_CHALLENGE_LENGTH 158 #define SSL_R_INVALID_COMMAND 280 #define SSL_R_INVALID_COMPRESSION_ALGORITHM 341 +#define SSL_R_INVALID_MESSAGE 374 #define SSL_R_INVALID_PURPOSE 278 +#define SSL_R_INVALID_SRP_USERNAME 357 #define SSL_R_INVALID_STATUS_RESPONSE 328 #define SSL_R_INVALID_TICKET_KEYS_LENGTH 325 #define SSL_R_INVALID_TRUST 279 @@ -2213,11 +2530,13 @@ void ERR_load_SSL_strings(void); #define SSL_R_MISSING_RSA_CERTIFICATE 168 #define SSL_R_MISSING_RSA_ENCRYPTING_CERT 169 #define SSL_R_MISSING_RSA_SIGNING_CERT 170 +#define SSL_R_MISSING_SRP_PARAM 358 #define SSL_R_MISSING_TMP_DH_KEY 171 #define SSL_R_MISSING_TMP_ECDH_KEY 311 #define SSL_R_MISSING_TMP_RSA_KEY 172 #define SSL_R_MISSING_TMP_RSA_PKEY 173 #define SSL_R_MISSING_VERIFY_MESSAGE 174 +#define SSL_R_MULTIPLE_SGC_RESTARTS 346 #define SSL_R_NON_SSLV2_INITIAL_PACKET 175 #define SSL_R_NO_CERTIFICATES_RETURNED 176 #define SSL_R_NO_CERTIFICATE_ASSIGNED 177 @@ -2234,6 +2553,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_NO_COMPRESSION_SPECIFIED 187 #define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER 330 #define SSL_R_NO_METHOD_SPECIFIED 188 +#define SSL_R_NO_P256_SUPPORT 373 #define SSL_R_NO_PRIVATEKEY 189 #define SSL_R_NO_PRIVATE_KEY_ASSIGNED 190 #define SSL_R_NO_PROTOCOLS_AVAILABLE 191 @@ -2241,6 +2561,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_NO_RENEGOTIATION 339 #define SSL_R_NO_REQUIRED_DIGEST 324 #define SSL_R_NO_SHARED_CIPHER 193 +#define SSL_R_NO_SRTP_PROFILES 359 #define SSL_R_NO_VERIFY_CALLBACK 194 #define SSL_R_NULL_SSL_CTX 195 #define SSL_R_NULL_SSL_METHOD_PASSED 196 @@ -2285,7 +2606,12 @@ void ERR_load_SSL_strings(void); #define SSL_R_SESSION_ID_CONTEXT_UNINITIALIZED 277 #define SSL_R_SESSION_MAY_NOT_BE_CREATED 2000 #define SSL_R_SHORT_READ 219 +#define SSL_R_SIGNATURE_ALGORITHMS_ERROR 360 #define SSL_R_SIGNATURE_FOR_NON_SIGNING_CERTIFICATE 220 +#define SSL_R_SRP_A_CALC 361 +#define SSL_R_SRTP_COULD_NOT_ALLOCATE_PROFILES 362 +#define SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG 363 +#define SSL_R_SRTP_UNKNOWN_PROTECTION_PROFILE 364 #define SSL_R_SSL23_DOING_SESSION_ID_REUSE 221 #define SSL_R_SSL2_CONNECTION_ID_TOO_LONG 299 #define SSL_R_SSL3_EXT_INVALID_ECPOINTFORMAT 321 @@ -2330,6 +2656,9 @@ void ERR_load_SSL_strings(void); #define SSL_R_TLSV1_UNRECOGNIZED_NAME 1112 #define SSL_R_TLSV1_UNSUPPORTED_EXTENSION 1110 #define SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER 232 +#define SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT 365 +#define SSL_R_TLS_HEARTBEAT_PENDING 366 +#define SSL_R_TLS_ILLEGAL_EXPORTER_LABEL 367 #define SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST 157 #define SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST 233 #define SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG 234 @@ -2351,6 +2680,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_UNKNOWN_CERTIFICATE_TYPE 247 #define SSL_R_UNKNOWN_CIPHER_RETURNED 248 #define SSL_R_UNKNOWN_CIPHER_TYPE 249 +#define SSL_R_UNKNOWN_DIGEST 368 #define SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE 250 #define SSL_R_UNKNOWN_PKEY_TYPE 251 #define SSL_R_UNKNOWN_PROTOCOL 252 @@ -2365,16 +2695,19 @@ void ERR_load_SSL_strings(void); #define SSL_R_UNSUPPORTED_PROTOCOL 258 #define SSL_R_UNSUPPORTED_SSL_VERSION 259 #define SSL_R_UNSUPPORTED_STATUS_TYPE 329 +#define SSL_R_USE_SRTP_NOT_NEGOTIATED 369 #define SSL_R_WRITE_BIO_NOT_SET 260 #define SSL_R_WRONG_CIPHER_RETURNED 261 #define SSL_R_WRONG_MESSAGE_TYPE 262 #define SSL_R_WRONG_NUMBER_OF_KEY_BITS 263 #define SSL_R_WRONG_SIGNATURE_LENGTH 264 #define SSL_R_WRONG_SIGNATURE_SIZE 265 +#define SSL_R_WRONG_SIGNATURE_TYPE 370 #define SSL_R_WRONG_SSL_VERSION 266 #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 +#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/include/openssl/ssl2.h b/app/openssl/include/openssl/ssl2.h index 99a52ea0..eb25dcb0 100644 --- a/app/openssl/include/openssl/ssl2.h +++ b/app/openssl/include/openssl/ssl2.h @@ -155,6 +155,8 @@ extern "C" { #define CERT char #endif +#ifndef OPENSSL_NO_SSL_INTERN + typedef struct ssl2_state_st { int three_byte_header; @@ -219,6 +221,8 @@ typedef struct ssl2_state_st } tmp; } SSL2_STATE; +#endif + /* SSLv2 */ /* client */ #define SSL2_ST_SEND_CLIENT_HELLO_A (0x10|SSL_ST_CONNECT) diff --git a/app/openssl/include/openssl/ssl3.h b/app/openssl/include/openssl/ssl3.h index f9268c57..f205f73d 100644 --- a/app/openssl/include/openssl/ssl3.h +++ b/app/openssl/include/openssl/ssl3.h @@ -280,9 +280,6 @@ extern "C" { #define SSL3_RT_MAX_EXTRA (16384) -/* Default buffer length used for writen records. Thus a generated record - * will contain plaintext no larger than this value. */ -#define SSL3_RT_DEFAULT_PLAIN_LENGTH 2048 /* Maximum plaintext length: defined by SSL/TLS standards */ #define SSL3_RT_MAX_PLAIN_LENGTH 16384 /* Maximum compression overhead: defined by SSL/TLS standards */ @@ -314,13 +311,6 @@ extern "C" { #define SSL3_RT_MAX_PACKET_SIZE \ (SSL3_RT_MAX_ENCRYPTED_LENGTH+SSL3_RT_HEADER_LENGTH) -/* Extra space for empty fragment, headers, MAC, and padding. */ -#define SSL3_RT_DEFAULT_WRITE_OVERHEAD 256 -#define SSL3_RT_DEFAULT_PACKET_SIZE 4096 - SSL3_RT_DEFAULT_WRITE_OVERHEAD -#if SSL3_RT_DEFAULT_PLAIN_LENGTH + SSL3_RT_DEFAULT_WRITE_OVERHEAD > SSL3_RT_DEFAULT_PACKET_SIZE -#error "Insufficient space allocated for write buffers." -#endif - #define SSL3_MD_CLIENT_FINISHED_CONST "\x43\x4C\x4E\x54" #define SSL3_MD_SERVER_FINISHED_CONST "\x53\x52\x56\x52" @@ -332,6 +322,7 @@ extern "C" { #define SSL3_RT_ALERT 21 #define SSL3_RT_HANDSHAKE 22 #define SSL3_RT_APPLICATION_DATA 23 +#define TLS1_RT_HEARTBEAT 24 #define SSL3_AL_WARNING 1 #define SSL3_AL_FATAL 2 @@ -349,6 +340,11 @@ extern "C" { #define SSL3_AD_CERTIFICATE_UNKNOWN 46 #define SSL3_AD_ILLEGAL_PARAMETER 47 /* fatal */ +#define TLS1_HB_REQUEST 1 +#define TLS1_HB_RESPONSE 2 + +#ifndef OPENSSL_NO_SSL_INTERN + typedef struct ssl3_record_st { /*r */ int type; /* type of record */ @@ -370,6 +366,8 @@ typedef struct ssl3_buffer_st int left; /* how many bytes left */ } SSL3_BUFFER; +#endif + #define SSL3_CT_RSA_SIGN 1 #define SSL3_CT_DSS_SIGN 2 #define SSL3_CT_RSA_FIXED_DH 3 @@ -389,6 +387,24 @@ typedef struct ssl3_buffer_st #define SSL3_FLAGS_POP_BUFFER 0x0004 #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 +#define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */ +#define SSL3_FLAGS_CCS_OK 0x0080 + +/* SSL3_FLAGS_SGC_RESTART_DONE is set when we + * restart a handshake because of MS SGC and so prevents us + * from restarting the handshake in a loop. It's reset on a + * renegotiation, so effectively limits the client to one restart + * per negotiation. This limits the possibility of a DDoS + * attack where the client handshakes in a loop using SGC to + * restart. Servers which permit renegotiation can still be + * effected, but we can't prevent that. + */ +#define SSL3_FLAGS_SGC_RESTART_DONE 0x0040 + +#ifndef OPENSSL_NO_SSL_INTERN typedef struct ssl3_state_st { @@ -406,8 +422,8 @@ typedef struct ssl3_state_st unsigned char client_random[SSL3_RANDOM_SIZE]; /* flags for countermeasure against known-IV weakness */ - int need_empty_fragments; - int empty_fragment_done; + int need_record_splitting; + int record_split_done; /* The value of 'extra' when the buffers were initialized */ int init_extra; @@ -465,12 +481,6 @@ typedef struct ssl3_state_st void *server_opaque_prf_input; size_t server_opaque_prf_input_len; -#ifndef OPENSSL_NO_NEXTPROTONEG - /* Set if we saw the Next Protocol Negotiation extension from - our peer. */ - int next_proto_neg_seen; -#endif - struct { /* actually only needs to be 16+20 */ unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2]; @@ -480,7 +490,7 @@ typedef struct ssl3_state_st int finish_md_len; unsigned char peer_finish_md[EVP_MAX_MD_SIZE*2]; int peer_finish_md_len; - + unsigned long message_size; int message_type; @@ -528,14 +538,64 @@ typedef struct ssl3_state_st unsigned char previous_server_finished[EVP_MAX_MD_SIZE]; unsigned char previous_server_finished_len; int send_connection_binding; /* TODOEKR */ + +#ifndef OPENSSL_NO_NEXTPROTONEG + /* Set if we saw the Next Protocol Negotiation extension from our peer. */ + int next_proto_neg_seen; +#endif + +#ifndef OPENSSL_NO_TLSEXT +#ifndef OPENSSL_NO_EC + /* This is set to true if we believe that this is a version of Safari + * running on OS X 10.6 or newer. We wish to know this because Safari + * on 10.8 .. 10.8.3 has broken ECDHE-ECDSA support. */ + char is_probably_safari; +#endif /* !OPENSSL_NO_EC */ +#endif /* !OPENSSL_NO_TLSEXT */ + + /* In a client, this means that the server supported Channel ID and that + * a Channel ID was sent. In a server it means that we echoed support + * for Channel IDs and that tlsext_channel_id will be valid after the + * handshake. */ + char tlsext_channel_id_valid; + /* For a server: + * If |tlsext_channel_id_valid| is true, then this contains the + * verified Channel ID from the client: a P256 point, (x,y), where + * each are big-endian values. */ + unsigned char tlsext_channel_id[64]; + + /* ALPN information + * (we are in the process of transitioning from NPN to ALPN.) */ + + /* In a server these point to the selected ALPN protocol after the + * ClientHello has been processed. In a client these contain the + * protocol that the server selected once the ServerHello has been + * processed. */ + unsigned char *alpn_selected; + unsigned alpn_selected_len; + + /* These point to the digest function to use for signatures made with + * each type of public key. A NULL value indicates that the default + * digest should be used, which is SHA1 as of TLS 1.2. + * + * (These should be in the tmp member, but we have to put them here to + * ensure binary compatibility with earlier OpenSSL 1.0.* releases.) */ + const EVP_MD *digest_rsa; + const EVP_MD *digest_dsa; + const EVP_MD *digest_ecdsa; } SSL3_STATE; +#endif /* SSLv3 */ /*client */ /* extra state */ #define SSL3_ST_CW_FLUSH (0x100|SSL_ST_CONNECT) #define SSL3_ST_CUTTHROUGH_COMPLETE (0x101|SSL_ST_CONNECT) +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_ST_CW_WRITE_SOCK (0x310|SSL_ST_CONNECT) +#define DTLS1_SCTP_ST_CR_READ_SOCK (0x320|SSL_ST_CONNECT) +#endif /* write to server */ #define SSL3_ST_CW_CLNT_HELLO_A (0x110|SSL_ST_CONNECT) #define SSL3_ST_CW_CLNT_HELLO_B (0x111|SSL_ST_CONNECT) @@ -567,6 +627,8 @@ typedef struct ssl3_state_st #define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT) #define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT) #endif +#define SSL3_ST_CW_CHANNEL_ID_A (0x210|SSL_ST_CONNECT) +#define SSL3_ST_CW_CHANNEL_ID_B (0x211|SSL_ST_CONNECT) #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT) #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT) /* read from server */ @@ -582,6 +644,10 @@ typedef struct ssl3_state_st /* server */ /* extra state */ #define SSL3_ST_SW_FLUSH (0x100|SSL_ST_ACCEPT) +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_ST_SW_WRITE_SOCK (0x310|SSL_ST_ACCEPT) +#define DTLS1_SCTP_ST_SR_READ_SOCK (0x320|SSL_ST_ACCEPT) +#endif /* read from client */ /* Do not change the number values, they do matter */ #define SSL3_ST_SR_CLNT_HELLO_A (0x110|SSL_ST_ACCEPT) @@ -612,10 +678,13 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif +#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) +#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT) /* write to client */ @@ -643,6 +712,7 @@ typedef struct ssl3_state_st #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_MT_NEXT_PROTO 67 #endif +#define SSL3_MT_ENCRYPTED_EXTENSIONS 203 #define DTLS1_MT_HELLO_VERIFY_REQUEST 3 diff --git a/app/openssl/include/openssl/symhacks.h b/app/openssl/include/openssl/symhacks.h index 3fd4a816..bd2f000d 100644 --- a/app/openssl/include/openssl/symhacks.h +++ b/app/openssl/include/openssl/symhacks.h @@ -176,7 +176,6 @@ #define SSL_CTX_set_default_passwd_cb_userdata SSL_CTX_set_def_passwd_cb_ud #undef SSL_COMP_get_compression_methods #define SSL_COMP_get_compression_methods SSL_COMP_get_compress_methods - #undef ssl_add_clienthello_renegotiate_ext #define ssl_add_clienthello_renegotiate_ext ssl_add_clienthello_reneg_ext #undef ssl_add_serverhello_renegotiate_ext @@ -185,6 +184,32 @@ #define ssl_parse_clienthello_renegotiate_ext ssl_parse_clienthello_reneg_ext #undef ssl_parse_serverhello_renegotiate_ext #define ssl_parse_serverhello_renegotiate_ext ssl_parse_serverhello_reneg_ext +#undef SSL_srp_server_param_with_username +#define SSL_srp_server_param_with_username SSL_srp_server_param_with_un +#undef SSL_CTX_set_srp_client_pwd_callback +#define SSL_CTX_set_srp_client_pwd_callback SSL_CTX_set_srp_client_pwd_cb +#undef SSL_CTX_set_srp_verify_param_callback +#define SSL_CTX_set_srp_verify_param_callback SSL_CTX_set_srp_vfy_param_cb +#undef SSL_CTX_set_srp_username_callback +#define SSL_CTX_set_srp_username_callback SSL_CTX_set_srp_un_cb +#undef ssl_add_clienthello_use_srtp_ext +#define ssl_add_clienthello_use_srtp_ext ssl_add_clihello_use_srtp_ext +#undef ssl_add_serverhello_use_srtp_ext +#define ssl_add_serverhello_use_srtp_ext ssl_add_serhello_use_srtp_ext +#undef ssl_parse_clienthello_use_srtp_ext +#define ssl_parse_clienthello_use_srtp_ext ssl_parse_clihello_use_srtp_ext +#undef ssl_parse_serverhello_use_srtp_ext +#define ssl_parse_serverhello_use_srtp_ext ssl_parse_serhello_use_srtp_ext +#undef SSL_CTX_set_next_protos_advertised_cb +#define SSL_CTX_set_next_protos_advertised_cb SSL_CTX_set_next_protos_adv_cb +#undef SSL_CTX_set_next_proto_select_cb +#define SSL_CTX_set_next_proto_select_cb SSL_CTX_set_next_proto_sel_cb +#undef ssl3_cbc_record_digest_supported +#define ssl3_cbc_record_digest_supported ssl3_cbc_record_digest_support +#undef ssl_check_clienthello_tlsext_late +#define ssl_check_clienthello_tlsext_late ssl_check_clihello_tlsext_late +#undef ssl_check_clienthello_tlsext_early +#define ssl_check_clienthello_tlsext_early ssl_check_clihello_tlsext_early /* Hack some long ENGINE names */ #undef ENGINE_get_default_BN_mod_exp_crt @@ -238,6 +263,9 @@ #define EC_GROUP_get_point_conversion_form EC_GROUP_get_point_conv_form #undef EC_GROUP_clear_free_all_extra_data #define EC_GROUP_clear_free_all_extra_data EC_GROUP_clr_free_all_xtra_data +#undef EC_KEY_set_public_key_affine_coordinates +#define EC_KEY_set_public_key_affine_coordinates \ + EC_KEY_set_pub_key_aff_coords #undef EC_POINT_set_Jprojective_coordinates_GFp #define EC_POINT_set_Jprojective_coordinates_GFp \ EC_POINT_set_Jproj_coords_GFp @@ -294,8 +322,6 @@ #define ec_GFp_simple_point_set_to_infinity ec_GFp_simple_pt_set_to_inf #undef ec_GFp_simple_points_make_affine #define ec_GFp_simple_points_make_affine ec_GFp_simple_pts_make_affine -#undef ec_GFp_simple_group_get_curve_GFp -#define ec_GFp_simple_group_get_curve_GFp ec_GFp_simple_grp_get_curve_GFp #undef ec_GFp_simple_set_Jprojective_coordinates_GFp #define ec_GFp_simple_set_Jprojective_coordinates_GFp \ ec_GFp_smp_set_Jproj_coords_GFp @@ -399,6 +425,12 @@ #undef dtls1_retransmit_buffered_messages #define dtls1_retransmit_buffered_messages dtls1_retransmit_buffered_msgs +/* Hack some long SRP names */ +#undef SRP_generate_server_master_secret +#define SRP_generate_server_master_secret SRP_gen_server_master_secret +#undef SRP_generate_client_master_secret +#define SRP_generate_client_master_secret SRP_gen_client_master_secret + /* Hack some long UI names */ #undef UI_method_get_prompt_constructor #define UI_method_get_prompt_constructor UI_method_get_prompt_constructr diff --git a/app/openssl/include/openssl/tls1.h b/app/openssl/include/openssl/tls1.h index 76f368ac..ec8948d5 100644 --- a/app/openssl/include/openssl/tls1.h +++ b/app/openssl/include/openssl/tls1.h @@ -159,10 +159,24 @@ extern "C" { #define TLS1_ALLOW_EXPERIMENTAL_CIPHERSUITES 0 +#define TLS1_2_VERSION 0x0303 +#define TLS1_2_VERSION_MAJOR 0x03 +#define TLS1_2_VERSION_MINOR 0x03 + +#define TLS1_1_VERSION 0x0302 +#define TLS1_1_VERSION_MAJOR 0x03 +#define TLS1_1_VERSION_MINOR 0x02 + #define TLS1_VERSION 0x0301 #define TLS1_VERSION_MAJOR 0x03 #define TLS1_VERSION_MINOR 0x01 +#define TLS1_get_version(s) \ + ((s->version >> 8) == TLS1_VERSION_MAJOR ? s->version : 0) + +#define TLS1_get_client_version(s) \ + ((s->client_version >> 8) == TLS1_VERSION_MAJOR ? s->client_version : 0) + #define TLS1_AD_DECRYPTION_FAILED 21 #define TLS1_AD_RECORD_OVERFLOW 22 #define TLS1_AD_UNKNOWN_CA 48 /* fatal */ @@ -183,17 +197,51 @@ extern "C" { #define TLS1_AD_BAD_CERTIFICATE_HASH_VALUE 114 #define TLS1_AD_UNKNOWN_PSK_IDENTITY 115 /* fatal */ -/* ExtensionType values from RFC3546 / RFC4366 */ +/* ExtensionType values from RFC3546 / RFC4366 / RFC6066 */ #define TLSEXT_TYPE_server_name 0 #define TLSEXT_TYPE_max_fragment_length 1 #define TLSEXT_TYPE_client_certificate_url 2 #define TLSEXT_TYPE_trusted_ca_keys 3 #define TLSEXT_TYPE_truncated_hmac 4 #define TLSEXT_TYPE_status_request 5 +/* ExtensionType values from RFC4681 */ +#define TLSEXT_TYPE_user_mapping 6 + +/* ExtensionType values from RFC5878 */ +#define TLSEXT_TYPE_client_authz 7 +#define TLSEXT_TYPE_server_authz 8 + +/* ExtensionType values from RFC6091 */ +#define TLSEXT_TYPE_cert_type 9 + /* ExtensionType values from RFC4492 */ #define TLSEXT_TYPE_elliptic_curves 10 #define TLSEXT_TYPE_ec_point_formats 11 + +/* ExtensionType value from RFC5054 */ +#define TLSEXT_TYPE_srp 12 + +/* ExtensionType values from RFC5246 */ +#define TLSEXT_TYPE_signature_algorithms 13 + +/* ExtensionType value from RFC5764 */ +#define TLSEXT_TYPE_use_srtp 14 + +/* ExtensionType value from RFC5620 */ +#define TLSEXT_TYPE_heartbeat 15 + +/* ExtensionType value for TLS padding extension. + * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml + * http://tools.ietf.org/html/draft-agl-tls-padding-03 + */ +#define TLSEXT_TYPE_padding 21 + +/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */ +#define TLSEXT_TYPE_application_layer_protocol_negotiation 16 + +/* ExtensionType value from RFC4507 */ #define TLSEXT_TYPE_session_ticket 35 + /* ExtensionType value from draft-rescorla-tls-opaque-prf-input-00.txt */ #if 0 /* will have to be provided externally for now , * i.e. build with -DTLSEXT_TYPE_opaque_prf_input=38183 @@ -209,6 +257,9 @@ extern "C" { #define TLSEXT_TYPE_next_proto_neg 13172 #endif +/* This is not an IANA defined extension number */ +#define TLSEXT_TYPE_channel_id 30031 + /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 /* status request value from RFC 3546 */ @@ -221,12 +272,37 @@ extern "C" { #define TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2 2 #define TLSEXT_ECPOINTFORMAT_last 2 +/* Signature and hash algorithms from RFC 5246 */ + +#define TLSEXT_signature_anonymous 0 +#define TLSEXT_signature_rsa 1 +#define TLSEXT_signature_dsa 2 +#define TLSEXT_signature_ecdsa 3 + +#define TLSEXT_hash_none 0 +#define TLSEXT_hash_md5 1 +#define TLSEXT_hash_sha1 2 +#define TLSEXT_hash_sha224 3 +#define TLSEXT_hash_sha256 4 +#define TLSEXT_hash_sha384 5 +#define TLSEXT_hash_sha512 6 + #ifndef OPENSSL_NO_TLSEXT #define TLSEXT_MAXLEN_host_name 255 -const char *SSL_get_servername(const SSL *s, const int type) ; -int SSL_get_servername_type(const SSL *s) ; +const char *SSL_get_servername(const SSL *s, const int type); +int SSL_get_servername_type(const SSL *s); +/* SSL_export_keying_material exports a value derived from the master secret, + * as specified in RFC 5705. It writes |olen| bytes to |out| given a label and + * optional context. (Since a zero length context is allowed, the |use_context| + * flag controls whether a context is included.) + * + * It returns 1 on success and zero otherwise. + */ +int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, + const char *label, size_t llen, const unsigned char *p, size_t plen, + int use_context); #define SSL_set_tlsext_host_name(s,name) \ SSL_ctrl(s,SSL_CTRL_SET_TLSEXT_HOSTNAME,TLSEXT_NAMETYPE_host_name,(char *)name) @@ -290,6 +366,16 @@ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG, 0, arg) #define SSL_CTX_set_tlsext_ticket_key_cb(ssl, cb) \ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_TLSEXT_HB_ENABLED 0x01 +#define SSL_TLSEXT_HB_DONT_SEND_REQUESTS 0x02 +#define SSL_TLSEXT_HB_DONT_RECV_REQUESTS 0x04 + +#define SSL_get_tlsext_heartbeat_pending(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING,0,NULL) +#define SSL_set_tlsext_heartbeat_no_requests(ssl, arg) \ + SSL_ctrl((ssl),SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS,arg,NULL) +#endif #endif /* PSK ciphersuites from 4279 */ @@ -327,6 +413,14 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_AES_256_SHA 0x03000039 #define TLS1_CK_ADH_WITH_AES_256_SHA 0x0300003A +/* TLS v1.2 ciphersuites */ +#define TLS1_CK_RSA_WITH_NULL_SHA256 0x0300003B +#define TLS1_CK_RSA_WITH_AES_128_SHA256 0x0300003C +#define TLS1_CK_RSA_WITH_AES_256_SHA256 0x0300003D +#define TLS1_CK_DH_DSS_WITH_AES_128_SHA256 0x0300003E +#define TLS1_CK_DH_RSA_WITH_AES_128_SHA256 0x0300003F +#define TLS1_CK_DHE_DSS_WITH_AES_128_SHA256 0x03000040 + /* Camellia ciphersuites from RFC4132 */ #define TLS1_CK_RSA_WITH_CAMELLIA_128_CBC_SHA 0x03000041 #define TLS1_CK_DH_DSS_WITH_CAMELLIA_128_CBC_SHA 0x03000042 @@ -335,6 +429,16 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA 0x03000045 #define TLS1_CK_ADH_WITH_CAMELLIA_128_CBC_SHA 0x03000046 +/* TLS v1.2 ciphersuites */ +#define TLS1_CK_DHE_RSA_WITH_AES_128_SHA256 0x03000067 +#define TLS1_CK_DH_DSS_WITH_AES_256_SHA256 0x03000068 +#define TLS1_CK_DH_RSA_WITH_AES_256_SHA256 0x03000069 +#define TLS1_CK_DHE_DSS_WITH_AES_256_SHA256 0x0300006A +#define TLS1_CK_DHE_RSA_WITH_AES_256_SHA256 0x0300006B +#define TLS1_CK_ADH_WITH_AES_128_SHA256 0x0300006C +#define TLS1_CK_ADH_WITH_AES_256_SHA256 0x0300006D + +/* Camellia ciphersuites from RFC4132 */ #define TLS1_CK_RSA_WITH_CAMELLIA_256_CBC_SHA 0x03000084 #define TLS1_CK_DH_DSS_WITH_CAMELLIA_256_CBC_SHA 0x03000085 #define TLS1_CK_DH_RSA_WITH_CAMELLIA_256_CBC_SHA 0x03000086 @@ -350,6 +454,20 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_SEED_SHA 0x0300009A #define TLS1_CK_ADH_WITH_SEED_SHA 0x0300009B +/* TLS v1.2 GCM ciphersuites from RFC5288 */ +#define TLS1_CK_RSA_WITH_AES_128_GCM_SHA256 0x0300009C +#define TLS1_CK_RSA_WITH_AES_256_GCM_SHA384 0x0300009D +#define TLS1_CK_DHE_RSA_WITH_AES_128_GCM_SHA256 0x0300009E +#define TLS1_CK_DHE_RSA_WITH_AES_256_GCM_SHA384 0x0300009F +#define TLS1_CK_DH_RSA_WITH_AES_128_GCM_SHA256 0x030000A0 +#define TLS1_CK_DH_RSA_WITH_AES_256_GCM_SHA384 0x030000A1 +#define TLS1_CK_DHE_DSS_WITH_AES_128_GCM_SHA256 0x030000A2 +#define TLS1_CK_DHE_DSS_WITH_AES_256_GCM_SHA384 0x030000A3 +#define TLS1_CK_DH_DSS_WITH_AES_128_GCM_SHA256 0x030000A4 +#define TLS1_CK_DH_DSS_WITH_AES_256_GCM_SHA384 0x030000A5 +#define TLS1_CK_ADH_WITH_AES_128_GCM_SHA256 0x030000A6 +#define TLS1_CK_ADH_WITH_AES_256_GCM_SHA384 0x030000A7 + /* ECC ciphersuites from draft-ietf-tls-ecc-12.txt with changes soon to be in draft 13 */ #define TLS1_CK_ECDH_ECDSA_WITH_NULL_SHA 0x0300C001 #define TLS1_CK_ECDH_ECDSA_WITH_RC4_128_SHA 0x0300C002 @@ -381,6 +499,42 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_anon_WITH_AES_128_CBC_SHA 0x0300C018 #define TLS1_CK_ECDH_anon_WITH_AES_256_CBC_SHA 0x0300C019 +/* SRP ciphersuites from RFC 5054 */ +#define TLS1_CK_SRP_SHA_WITH_3DES_EDE_CBC_SHA 0x0300C01A +#define TLS1_CK_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA 0x0300C01B +#define TLS1_CK_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA 0x0300C01C +#define TLS1_CK_SRP_SHA_WITH_AES_128_CBC_SHA 0x0300C01D +#define TLS1_CK_SRP_SHA_RSA_WITH_AES_128_CBC_SHA 0x0300C01E +#define TLS1_CK_SRP_SHA_DSS_WITH_AES_128_CBC_SHA 0x0300C01F +#define TLS1_CK_SRP_SHA_WITH_AES_256_CBC_SHA 0x0300C020 +#define TLS1_CK_SRP_SHA_RSA_WITH_AES_256_CBC_SHA 0x0300C021 +#define TLS1_CK_SRP_SHA_DSS_WITH_AES_256_CBC_SHA 0x0300C022 + +/* ECDH HMAC based ciphersuites from RFC5289 */ + +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_SHA256 0x0300C023 +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_256_SHA384 0x0300C024 +#define TLS1_CK_ECDH_ECDSA_WITH_AES_128_SHA256 0x0300C025 +#define TLS1_CK_ECDH_ECDSA_WITH_AES_256_SHA384 0x0300C026 +#define TLS1_CK_ECDHE_RSA_WITH_AES_128_SHA256 0x0300C027 +#define TLS1_CK_ECDHE_RSA_WITH_AES_256_SHA384 0x0300C028 +#define TLS1_CK_ECDH_RSA_WITH_AES_128_SHA256 0x0300C029 +#define TLS1_CK_ECDH_RSA_WITH_AES_256_SHA384 0x0300C02A + +/* ECDH GCM based ciphersuites from RFC5289 */ +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 0x0300C02B +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 0x0300C02C +#define TLS1_CK_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 0x0300C02D +#define TLS1_CK_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 0x0300C02E +#define TLS1_CK_ECDHE_RSA_WITH_AES_128_GCM_SHA256 0x0300C02F +#define TLS1_CK_ECDHE_RSA_WITH_AES_256_GCM_SHA384 0x0300C030 +#define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 +#define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 + +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 + /* XXX * Inconsistency alert: * The OpenSSL names of ciphers with ephemeral DH here include the string @@ -448,6 +602,17 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_PSK_WITH_AES_128_CBC_SHA "PSK-AES128-CBC-SHA" #define TLS1_TXT_PSK_WITH_AES_256_CBC_SHA "PSK-AES256-CBC-SHA" +/* SRP ciphersuite from RFC 5054 */ +#define TLS1_TXT_SRP_SHA_WITH_3DES_EDE_CBC_SHA "SRP-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA "SRP-RSA-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA "SRP-DSS-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_WITH_AES_128_CBC_SHA "SRP-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_AES_128_CBC_SHA "SRP-RSA-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_AES_128_CBC_SHA "SRP-DSS-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_WITH_AES_256_CBC_SHA "SRP-AES-256-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_AES_256_CBC_SHA "SRP-RSA-AES-256-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_AES_256_CBC_SHA "SRP-DSS-AES-256-CBC-SHA" + /* Camellia ciphersuites from RFC4132 */ #define TLS1_TXT_RSA_WITH_CAMELLIA_128_CBC_SHA "CAMELLIA128-SHA" #define TLS1_TXT_DH_DSS_WITH_CAMELLIA_128_CBC_SHA "DH-DSS-CAMELLIA128-SHA" @@ -471,6 +636,59 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_DHE_RSA_WITH_SEED_SHA "DHE-RSA-SEED-SHA" #define TLS1_TXT_ADH_WITH_SEED_SHA "ADH-SEED-SHA" +/* TLS v1.2 ciphersuites */ +#define TLS1_TXT_RSA_WITH_NULL_SHA256 "NULL-SHA256" +#define TLS1_TXT_RSA_WITH_AES_128_SHA256 "AES128-SHA256" +#define TLS1_TXT_RSA_WITH_AES_256_SHA256 "AES256-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_128_SHA256 "DH-DSS-AES128-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_128_SHA256 "DH-RSA-AES128-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_128_SHA256 "DHE-DSS-AES128-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_128_SHA256 "DHE-RSA-AES128-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_256_SHA256 "DH-DSS-AES256-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_256_SHA256 "DH-RSA-AES256-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_256_SHA256 "DHE-DSS-AES256-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_256_SHA256 "DHE-RSA-AES256-SHA256" +#define TLS1_TXT_ADH_WITH_AES_128_SHA256 "ADH-AES128-SHA256" +#define TLS1_TXT_ADH_WITH_AES_256_SHA256 "ADH-AES256-SHA256" + +/* TLS v1.2 GCM ciphersuites from RFC5288 */ +#define TLS1_TXT_RSA_WITH_AES_128_GCM_SHA256 "AES128-GCM-SHA256" +#define TLS1_TXT_RSA_WITH_AES_256_GCM_SHA384 "AES256-GCM-SHA384" +#define TLS1_TXT_DHE_RSA_WITH_AES_128_GCM_SHA256 "DHE-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_256_GCM_SHA384 "DHE-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_DH_RSA_WITH_AES_128_GCM_SHA256 "DH-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_256_GCM_SHA384 "DH-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_DHE_DSS_WITH_AES_128_GCM_SHA256 "DHE-DSS-AES128-GCM-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_256_GCM_SHA384 "DHE-DSS-AES256-GCM-SHA384" +#define TLS1_TXT_DH_DSS_WITH_AES_128_GCM_SHA256 "DH-DSS-AES128-GCM-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_256_GCM_SHA384 "DH-DSS-AES256-GCM-SHA384" +#define TLS1_TXT_ADH_WITH_AES_128_GCM_SHA256 "ADH-AES128-GCM-SHA256" +#define TLS1_TXT_ADH_WITH_AES_256_GCM_SHA384 "ADH-AES256-GCM-SHA384" + +/* ECDH HMAC based ciphersuites from RFC5289 */ + +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_SHA256 "ECDHE-ECDSA-AES128-SHA256" +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_SHA384 "ECDHE-ECDSA-AES256-SHA384" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_128_SHA256 "ECDH-ECDSA-AES128-SHA256" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_256_SHA384 "ECDH-ECDSA-AES256-SHA384" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_128_SHA256 "ECDHE-RSA-AES128-SHA256" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_256_SHA384 "ECDHE-RSA-AES256-SHA384" +#define TLS1_TXT_ECDH_RSA_WITH_AES_128_SHA256 "ECDH-RSA-AES128-SHA256" +#define TLS1_TXT_ECDH_RSA_WITH_AES_256_SHA384 "ECDH-RSA-AES256-SHA384" + +/* ECDH GCM based ciphersuites from RFC5289 */ +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 "ECDHE-ECDSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 "ECDHE-ECDSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 "ECDH-ECDSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 "ECDH-ECDSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_128_GCM_SHA256 "ECDHE-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_256_GCM_SHA384 "ECDHE-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" + +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openssl/include/openssl/ts.h b/app/openssl/include/openssl/ts.h index 190e8a1b..c2448e3c 100644 --- a/app/openssl/include/openssl/ts.h +++ b/app/openssl/include/openssl/ts.h @@ -86,9 +86,6 @@ #include #endif -#include - - #ifdef __cplusplus extern "C" { #endif diff --git a/app/openssl/include/openssl/ui.h b/app/openssl/include/openssl/ui.h index 2b1cfa22..bd78aa41 100644 --- a/app/openssl/include/openssl/ui.h +++ b/app/openssl/include/openssl/ui.h @@ -316,7 +316,7 @@ int (*UI_method_get_writer(UI_METHOD *method))(UI*,UI_STRING*); int (*UI_method_get_flusher(UI_METHOD *method))(UI*); int (*UI_method_get_reader(UI_METHOD *method))(UI*,UI_STRING*); int (*UI_method_get_closer(UI_METHOD *method))(UI*); -char* (*UI_method_get_prompt_constructor(UI_METHOD *method))(UI*, const char*, const char*); +char * (*UI_method_get_prompt_constructor(UI_METHOD *method))(UI*, const char*, const char*); /* The following functions are helpers for method writers to access relevant data from a UI_STRING. */ diff --git a/app/openssl/include/openssl/x509.h b/app/openssl/include/openssl/x509.h index e6f8a403..092dd745 100644 --- a/app/openssl/include/openssl/x509.h +++ b/app/openssl/include/openssl/x509.h @@ -657,11 +657,15 @@ int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey); int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki); +int X509_signature_dump(BIO *bp,const ASN1_STRING *sig, int indent); int X509_signature_print(BIO *bp,X509_ALGOR *alg, ASN1_STRING *sig); int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_pubkey_digest(const X509 *data,const EVP_MD *type, @@ -763,6 +767,7 @@ X509_ALGOR *X509_ALGOR_dup(X509_ALGOR *xn); int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, void *pval); void X509_ALGOR_get0(ASN1_OBJECT **paobj, int *pptype, void **ppval, X509_ALGOR *algor); +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); X509_NAME *X509_NAME_dup(X509_NAME *xn); X509_NAME_ENTRY *X509_NAME_ENTRY_dup(X509_NAME_ENTRY *ne); @@ -896,6 +901,9 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *algor1, int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, ASN1_BIT_STRING *signature, void *data, EVP_PKEY *pkey, const EVP_MD *type); +int ASN1_item_sign_ctx(const ASN1_ITEM *it, + X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, void *asn, EVP_MD_CTX *ctx); #endif int X509_set_version(X509 *x,long version); @@ -1161,6 +1169,9 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, unsigned char *salt, int saltlen, unsigned char *aiv, int prf_nid); +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen); + /* PKCS#8 utilities */ DECLARE_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO) diff --git a/app/openssl/ndk-build-clear.mk b/app/openssl/ndk-build-clear.mk new file mode 100644 index 00000000..0d7404a1 --- /dev/null +++ b/app/openssl/ndk-build-clear.mk @@ -0,0 +1,14 @@ +LOCAL_SRC_FILES_x86 := +LOCAL_CFLAGS_x86 := + +LOCAL_SRC_FILES_arm64 := +LOCAL_CFLAGS_arm64 := + +LOCAL_SRC_FILES_arm := +LOCAL_CFLAGS_arm := + +LOCAL_SRC_FILES_x86_64 := +LOCAL_CFLAGS_x86_64 := + +LOCAL_SRC_FILES_mips := +LOCAL_CFLAGS_mips := diff --git a/app/openssl/ndk-build.mk b/app/openssl/ndk-build.mk new file mode 100644 index 00000000..50372e92 --- /dev/null +++ b/app/openssl/ndk-build.mk @@ -0,0 +1,3 @@ +LOCAL_SRC_FILES := $(LOCAL_SRC_FILES_$(TARGET_ARCH)) +LOCAL_CFLAGS += $(LOCAL_CFLAGS_$(TARGET_ARCH)) +LOCAL_CFLAGS += $(LOCAL_CFLAGS_32) diff --git a/app/openssl/openssl.config b/app/openssl/openssl.config index 293c744c..aa028705 100644 --- a/app/openssl/openssl.config +++ b/app/openssl/openssl.config @@ -1,7 +1,41 @@ CONFIGURE_ARGS="\ -linux-generic32 \ -no-idea no-cast no-seed no-md2 no-sha0 no-whrlpool \ -DL_ENDIAN \ +no-camellia \ +no-capieng \ +no-cast \ +no-dtls1 \ +no-gost \ +no-gmp \ +no-heartbeats \ +no-idea \ +no-jpake \ +no-md2 \ +no-mdc2 \ +no-rc5 \ +no-rdrand \ +no-rfc3779 \ +no-rsax \ +no-sctp \ +no-seed \ +no-sha0 \ +no-static_engine \ +no-whirlpool \ +no-zlib \ +" + +# configure arguments specific for 32-bit arch +CONFIGURE_ARGS_32="\ +linux-generic32 \ +" + +# configure arguments specific for 64-bit arch +CONFIGURE_ARGS_64="\ +linux-generic64 \ +" + +# configure arguments specific for static build +CONFIGURE_ARGS_STATIC="\ +no-dso \ " # unneeded directories @@ -10,12 +44,12 @@ MacOS \ Netware \ VMS \ apps/demoCA \ +apps/demoSRP \ apps/set \ bugs \ certs \ crypto/camellia \ crypto/cast \ -crypto/cms \ crypto/idea \ crypto/md2 \ crypto/rc5 \ @@ -60,6 +94,7 @@ README.ASN1 \ README.ENGINE \ apps/CA.com \ apps/Makefile \ +apps/Makefile.save \ apps/install-apps.com \ apps/makeapps.com \ apps/openssl-vms.cnf \ @@ -68,12 +103,15 @@ apps/tsget \ apps/vms_decc_init.c \ config \ crypto/LPdir_vms.c \ -crypto/LPdir_win32.c \ crypto/Makefile \ +crypto/Makefile.save \ crypto/aes/Makefile \ +crypto/aes/Makefile.save \ crypto/asn1/Makefile \ +crypto/asn1/Makefile.save \ crypto/bf/INSTALL \ crypto/bf/Makefile \ +crypto/bf/Makefile.save \ crypto/bf/README \ crypto/bf/VERSION \ crypto/bf/asm/readme \ @@ -83,64 +121,117 @@ crypto/bf/bfs.cpp \ crypto/bf/bfspeed.c \ crypto/bf/bftest.c \ crypto/bio/Makefile \ +crypto/bio/Makefile.save \ crypto/bio/bss_rtcp.c \ crypto/bn/Makefile \ +crypto/bn/Makefile.save \ crypto/bn/asm/vms.mar \ +crypto/bn/bn_x931p.c \ crypto/bn/vms-helper.c \ crypto/buffer/Makefile \ +crypto/buffer/Makefile.save \ +crypto/cmac/Makefile \ +crypto/cmac/Makefile.save \ +crypto/cms/Makefile \ +crypto/cms/Makefile.save \ crypto/comp/Makefile \ +crypto/comp/Makefile.save \ crypto/conf/Makefile \ +crypto/conf/Makefile.save \ crypto/crypto-lib.com \ crypto/des/Makefile \ +crypto/des/Makefile.save \ crypto/des/des-lib.com \ crypto/dh/Makefile \ +crypto/dh/Makefile.save \ crypto/dh/dh_prn.c \ crypto/dsa/Makefile \ +crypto/dsa/Makefile.save \ crypto/dso/Makefile \ +crypto/dso/Makefile.save \ crypto/dso/dso_beos.c \ crypto/dso/dso_vms.c \ crypto/dso/dso_win32.c \ crypto/ec/Makefile \ +crypto/ec/Makefile.save \ +crypto/ec/ecp_nistp224.c \ +crypto/ec/ecp_nistp256.c \ +crypto/ec/ecp_nistp521.c \ +crypto/ec/ecp_nistputil.c \ crypto/ecdh/Makefile \ +crypto/ecdh/Makefile.save \ crypto/ecdsa/Makefile \ +crypto/ecdsa/Makefile.save \ crypto/engine/Makefile \ -crypto/engine/tb_asnmth.c \ -crypto/engine/tb_pkmeth.c \ +crypto/engine/Makefile.save \ +crypto/engine/eng_rdrand.c \ +crypto/engine/eng_rsax.c \ crypto/err/Makefile \ +crypto/err/Makefile.save \ crypto/evp/Makefile \ +crypto/evp/Makefile.save \ +crypto/evp/evp_fips.c \ crypto/evp/m_md2.c \ crypto/evp/m_sha.c \ +crypto/fips_err.h \ +crypto/fips_ers.c \ crypto/hmac/Makefile \ +crypto/hmac/Makefile.save \ crypto/install-crypto.com \ crypto/jpake/Makefile \ crypto/krb5/Makefile \ +crypto/krb5/Makefile.save \ crypto/lhash/Makefile \ +crypto/lhash/Makefile.save \ crypto/md4/Makefile \ +crypto/md4/Makefile.save \ crypto/md5/Makefile \ +crypto/md5/Makefile.save \ crypto/mdc2/Makefile \ +crypto/mdc2/Makefile.save \ crypto/modes/Makefile \ +crypto/modes/Makefile.save \ crypto/modes/cts128.c \ crypto/modes/modes.h \ +crypto/o_fips.c \ crypto/objects/Makefile \ +crypto/objects/Makefile.save \ crypto/ocsp/Makefile \ +crypto/ocsp/Makefile.save \ crypto/pem/Makefile \ +crypto/pem/Makefile.save \ crypto/pkcs12/Makefile \ +crypto/pkcs12/Makefile.save \ crypto/pkcs7/Makefile \ +crypto/pkcs7/Makefile.save \ crypto/pkcs7/bio_pk7.c \ +crypto/ppccap.c \ crypto/pqueue/Makefile \ +crypto/pqueue/Makefile.save \ crypto/rand/Makefile \ +crypto/rand/Makefile.save \ crypto/rand/rand_vms.c \ crypto/rc2/Makefile \ +crypto/rc2/Makefile.save \ crypto/rc4/Makefile \ +crypto/rc4/Makefile.save \ crypto/ripemd/Makefile \ +crypto/ripemd/Makefile.save \ crypto/rsa/Makefile \ +crypto/rsa/Makefile.save \ crypto/sha/Makefile \ +crypto/sha/Makefile.save \ crypto/sha/sha_one.c \ +crypto/srp/Makefile \ +crypto/srp/Makefile.save \ +crypto/srp/srptest.c \ crypto/stack/Makefile \ +crypto/stack/Makefile.save \ crypto/store/Makefile \ crypto/threads/pthreads-vms.com \ crypto/threads/win32.bat \ crypto/ts/Makefile \ +crypto/ts/Makefile.save \ crypto/ts/ts.h \ crypto/ts/ts_asn1.c \ crypto/ts/ts_conf.c \ @@ -153,13 +244,16 @@ crypto/ts/ts_rsp_utils.c \ crypto/ts/ts_rsp_verify.c \ crypto/ts/ts_verify_ctx.c \ crypto/txt_db/Makefile \ +crypto/txt_db/Makefile.save \ crypto/ui/Makefile \ +crypto/ui/Makefile.save \ crypto/vms_rms.h crypto/x509/Makefile \ +crypto/x509/Makefile.save \ crypto/x509v3/Makefile \ +crypto/x509v3/Makefile.save \ include/openssl/camellia.h \ include/openssl/cast.h \ -include/openssl/cms.h \ include/openssl/idea.h \ include/openssl/mdc2.h \ include/openssl/seed.h \ @@ -168,6 +262,8 @@ install.com \ makevms.com \ openssl.doxy \ openssl.spec \ +ssl/Makefile \ +ssl/Makefile.save \ ssl/install-ssl.com \ ssl/ssl-lib.com \ ssl/ssl_task.c \ @@ -182,78 +278,827 @@ ssl \ include \ " -OPENSSL_PATCHES="\ -progs.patch \ -small_records.patch \ -handshake_cutthrough.patch \ -jsse.patch \ -npn.patch \ -sslv3_uninit_padding.patch \ -sha1_armv4_large.patch \ +# Arch-specific compiler defines for crypto/ library. +# +OPENSSL_CRYPTO_DEFINES="\ +NO_WINDOWS_BRAINDEATH \ " -OPENSSL_PATCHES_progs_SOURCES="\ -apps/openssl.c \ -apps/progs.h \ -apps/speed.c \ +OPENSSL_CRYPTO_DEFINES_arm="\ +OPENSSL_BN_ASM_GF2m \ +OPENSSL_BN_ASM_MONT \ +OPENSSL_CPUID_OBJ \ +GHASH_ASM \ +AES_ASM \ +BSAES_ASM \ +SHA1_ASM \ +SHA256_ASM \ +SHA512_ASM \ +" + +OPENSSL_CRYPTO_DEFINES_arm64="\ +OPENSSL_NO_ASM \ +" + +OPENSSL_CRYPTO_DEFINES_mips="\ +OPENSSL_BN_ASM_MONT \ +AES_ASM \ +SHA1_ASM \ +SHA256_ASM \ +" + +OPENSSL_CRYPTO_DEFINES_x86="\ +OPENSSL_IA32_SSE2 \ +OPENSSL_BN_ASM_GF2m \ +OPENSSL_BN_ASM_MONT \ +OPENSSL_BN_ASM_PART_WORDS \ +AES_ASM \ +VPAES_ASM \ +GHASH_ASM \ +SHA1_ASM \ +SHA256_ASM \ +SHA512_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \ +" + +OPENSSL_CRYPTO_DEFINES_x86_64="\ +OPENSSL_BN_ASM_GF2m \ +OPENSSL_BN_ASM_MONT \ +OPENSSL_BN_ASM_MONT5 \ +AES_ASM \ +VPAES_ASM \ +BSAES_ASM \ +GHASH_ASM \ +SHA1_ASM \ +SHA256_ASM \ +SHA512_ASM \ +MD5_ASM \ +DES_PTR \ +DES_RISC1 \ +DES_UNROLL \ +OPENSSL_CPUID_OBJ \ +" + +OPENSSL_CRYPTO_INCLUDES="\ +. \ +include \ +crypto \ +crypto/asn1 \ +crypto/evp \ +crypto/modes \ +include \ +include/openssl \ +" + +OPENSSL_CRYPTO_SOURCES="\ +crypto/cryptlib.c \ +crypto/mem.c \ +crypto/mem_clr.c \ +crypto/mem_dbg.c \ +crypto/cversion.c \ +crypto/ex_data.c \ +crypto/cpt_err.c \ +crypto/ebcdic.c \ +crypto/uid.c \ +crypto/o_time.c \ +crypto/o_str.c \ +crypto/o_dir.c \ +crypto/aes/aes_cbc.c \ +crypto/aes/aes_cfb.c \ +crypto/aes/aes_core.c \ +crypto/aes/aes_ctr.c \ +crypto/aes/aes_ecb.c \ +crypto/aes/aes_misc.c \ +crypto/aes/aes_ofb.c \ +crypto/aes/aes_wrap.c \ +crypto/asn1/a_bitstr.c \ +crypto/asn1/a_bool.c \ +crypto/asn1/a_bytes.c \ +crypto/asn1/a_d2i_fp.c \ +crypto/asn1/a_digest.c \ +crypto/asn1/a_dup.c \ +crypto/asn1/a_enum.c \ +crypto/asn1/a_gentm.c \ +crypto/asn1/a_i2d_fp.c \ +crypto/asn1/a_int.c \ +crypto/asn1/a_mbstr.c \ +crypto/asn1/a_object.c \ +crypto/asn1/a_octet.c \ +crypto/asn1/a_print.c \ +crypto/asn1/a_set.c \ +crypto/asn1/a_sign.c \ +crypto/asn1/a_strex.c \ +crypto/asn1/a_strnid.c \ +crypto/asn1/a_time.c \ +crypto/asn1/a_type.c \ +crypto/asn1/a_utctm.c \ +crypto/asn1/a_utf8.c \ +crypto/asn1/a_verify.c \ +crypto/asn1/ameth_lib.c \ +crypto/asn1/asn1_err.c \ +crypto/asn1/asn1_gen.c \ +crypto/asn1/asn1_lib.c \ +crypto/asn1/asn1_par.c \ +crypto/asn1/asn_mime.c \ +crypto/asn1/asn_moid.c \ +crypto/asn1/asn_pack.c \ +crypto/asn1/bio_asn1.c \ +crypto/asn1/bio_ndef.c \ +crypto/asn1/d2i_pr.c \ +crypto/asn1/d2i_pu.c \ +crypto/asn1/evp_asn1.c \ +crypto/asn1/f_enum.c \ +crypto/asn1/f_int.c \ +crypto/asn1/f_string.c \ +crypto/asn1/i2d_pr.c \ +crypto/asn1/i2d_pu.c \ +crypto/asn1/n_pkey.c \ +crypto/asn1/nsseq.c \ +crypto/asn1/p5_pbe.c \ +crypto/asn1/p5_pbev2.c \ +crypto/asn1/p8_pkey.c \ +crypto/asn1/t_bitst.c \ +crypto/asn1/t_crl.c \ +crypto/asn1/t_pkey.c \ +crypto/asn1/t_req.c \ +crypto/asn1/t_spki.c \ +crypto/asn1/t_x509.c \ +crypto/asn1/t_x509a.c \ +crypto/asn1/tasn_dec.c \ +crypto/asn1/tasn_enc.c \ +crypto/asn1/tasn_fre.c \ +crypto/asn1/tasn_new.c \ +crypto/asn1/tasn_prn.c \ +crypto/asn1/tasn_typ.c \ +crypto/asn1/tasn_utl.c \ +crypto/asn1/x_algor.c \ +crypto/asn1/x_attrib.c \ +crypto/asn1/x_bignum.c \ +crypto/asn1/x_crl.c \ +crypto/asn1/x_exten.c \ +crypto/asn1/x_info.c \ +crypto/asn1/x_long.c \ +crypto/asn1/x_name.c \ +crypto/asn1/x_nx509.c \ +crypto/asn1/x_pkey.c \ +crypto/asn1/x_pubkey.c \ +crypto/asn1/x_req.c \ +crypto/asn1/x_sig.c \ +crypto/asn1/x_spki.c \ +crypto/asn1/x_val.c \ +crypto/asn1/x_x509.c \ +crypto/asn1/x_x509a.c \ +crypto/bf/bf_cfb64.c \ +crypto/bf/bf_ecb.c \ +crypto/bf/bf_enc.c \ +crypto/bf/bf_ofb64.c \ +crypto/bf/bf_skey.c \ +crypto/bio/b_dump.c \ +crypto/bio/b_print.c \ +crypto/bio/b_sock.c \ +crypto/bio/bf_buff.c \ +crypto/bio/bf_nbio.c \ +crypto/bio/bf_null.c \ +crypto/bio/bio_cb.c \ +crypto/bio/bio_err.c \ +crypto/bio/bio_lib.c \ +crypto/bio/bss_acpt.c \ +crypto/bio/bss_bio.c \ +crypto/bio/bss_conn.c \ +crypto/bio/bss_dgram.c \ +crypto/bio/bss_fd.c \ +crypto/bio/bss_file.c \ +crypto/bio/bss_log.c \ +crypto/bio/bss_mem.c \ +crypto/bio/bss_null.c \ +crypto/bio/bss_sock.c \ +crypto/bn/bn_add.c \ +crypto/bn/bn_asm.c \ +crypto/bn/bn_blind.c \ +crypto/bn/bn_const.c \ +crypto/bn/bn_ctx.c \ +crypto/bn/bn_div.c \ +crypto/bn/bn_err.c \ +crypto/bn/bn_exp.c \ +crypto/bn/bn_exp2.c \ +crypto/bn/bn_gcd.c \ +crypto/bn/bn_gf2m.c \ +crypto/bn/bn_kron.c \ +crypto/bn/bn_lib.c \ +crypto/bn/bn_mod.c \ +crypto/bn/bn_mont.c \ +crypto/bn/bn_mpi.c \ +crypto/bn/bn_mul.c \ +crypto/bn/bn_nist.c \ +crypto/bn/bn_prime.c \ +crypto/bn/bn_print.c \ +crypto/bn/bn_rand.c \ +crypto/bn/bn_recp.c \ +crypto/bn/bn_shift.c \ +crypto/bn/bn_sqr.c \ +crypto/bn/bn_sqrt.c \ +crypto/bn/bn_word.c \ +crypto/buffer/buf_err.c \ +crypto/buffer/buf_str.c \ +crypto/buffer/buffer.c \ +crypto/cmac/cm_ameth.c \ +crypto/cmac/cm_pmeth.c \ +crypto/cmac/cmac.c \ +crypto/cms/cms_asn1.c \ +crypto/cms/cms_att.c \ +crypto/cms/cms_cd.c \ +crypto/cms/cms_dd.c \ +crypto/cms/cms_enc.c \ +crypto/cms/cms_env.c \ +crypto/cms/cms_err.c \ +crypto/cms/cms_ess.c \ +crypto/cms/cms_io.c \ +crypto/cms/cms_lib.c \ +crypto/cms/cms_pwri.c \ +crypto/cms/cms_sd.c \ +crypto/cms/cms_smime.c \ +crypto/comp/c_rle.c \ +crypto/comp/c_zlib.c \ +crypto/comp/comp_err.c \ +crypto/comp/comp_lib.c \ +crypto/conf/conf_api.c \ +crypto/conf/conf_def.c \ +crypto/conf/conf_err.c \ +crypto/conf/conf_lib.c \ +crypto/conf/conf_mall.c \ +crypto/conf/conf_mod.c \ +crypto/conf/conf_sap.c \ +crypto/des/cbc_cksm.c \ +crypto/des/cbc_enc.c \ +crypto/des/cfb64ede.c \ +crypto/des/cfb64enc.c \ +crypto/des/cfb_enc.c \ +crypto/des/des_enc.c \ +crypto/des/des_old.c \ +crypto/des/des_old2.c \ +crypto/des/ecb3_enc.c \ +crypto/des/ecb_enc.c \ +crypto/des/ede_cbcm_enc.c \ +crypto/des/enc_read.c \ +crypto/des/enc_writ.c \ +crypto/des/fcrypt.c \ +crypto/des/fcrypt_b.c \ +crypto/des/ofb64ede.c \ +crypto/des/ofb64enc.c \ +crypto/des/ofb_enc.c \ +crypto/des/pcbc_enc.c \ +crypto/des/qud_cksm.c \ +crypto/des/rand_key.c \ +crypto/des/read2pwd.c \ +crypto/des/rpc_enc.c \ +crypto/des/set_key.c \ +crypto/des/str2key.c \ +crypto/des/xcbc_enc.c \ +crypto/dh/dh_ameth.c \ +crypto/dh/dh_asn1.c \ +crypto/dh/dh_check.c \ +crypto/dh/dh_depr.c \ +crypto/dh/dh_err.c \ +crypto/dh/dh_gen.c \ +crypto/dh/dh_key.c \ +crypto/dh/dh_lib.c \ +crypto/dh/dh_pmeth.c \ +crypto/dsa/dsa_ameth.c \ +crypto/dsa/dsa_asn1.c \ +crypto/dsa/dsa_depr.c \ +crypto/dsa/dsa_err.c \ +crypto/dsa/dsa_gen.c \ +crypto/dsa/dsa_key.c \ +crypto/dsa/dsa_lib.c \ +crypto/dsa/dsa_ossl.c \ +crypto/dsa/dsa_pmeth.c \ +crypto/dsa/dsa_prn.c \ +crypto/dsa/dsa_sign.c \ +crypto/dsa/dsa_vrf.c \ +crypto/dso/dso_dl.c \ +crypto/dso/dso_dlfcn.c \ +crypto/dso/dso_err.c \ +crypto/dso/dso_lib.c \ +crypto/dso/dso_null.c \ +crypto/dso/dso_openssl.c \ +crypto/ec/ec2_mult.c \ +crypto/ec/ec2_oct.c \ +crypto/ec/ec2_smpl.c \ +crypto/ec/ec_ameth.c \ +crypto/ec/ec_asn1.c \ +crypto/ec/ec_check.c \ +crypto/ec/ec_curve.c \ +crypto/ec/ec_cvt.c \ +crypto/ec/ec_err.c \ +crypto/ec/ec_key.c \ +crypto/ec/ec_lib.c \ +crypto/ec/ec_mult.c \ +crypto/ec/ec_oct.c \ +crypto/ec/ec_pmeth.c \ +crypto/ec/ec_print.c \ +crypto/ec/eck_prn.c \ +crypto/ec/ecp_mont.c \ +crypto/ec/ecp_nist.c \ +crypto/ec/ecp_oct.c \ +crypto/ec/ecp_smpl.c \ +crypto/ecdh/ech_err.c \ +crypto/ecdh/ech_key.c \ +crypto/ecdh/ech_lib.c \ +crypto/ecdh/ech_ossl.c \ +crypto/ecdsa/ecs_asn1.c \ +crypto/ecdsa/ecs_err.c \ +crypto/ecdsa/ecs_lib.c \ +crypto/ecdsa/ecs_ossl.c \ +crypto/ecdsa/ecs_sign.c \ +crypto/ecdsa/ecs_vrf.c \ +crypto/engine/eng_all.c \ +crypto/engine/eng_cnf.c \ +crypto/engine/eng_ctrl.c \ +crypto/engine/eng_dyn.c \ +crypto/engine/eng_err.c \ +crypto/engine/eng_fat.c \ +crypto/engine/eng_init.c \ +crypto/engine/eng_lib.c \ +crypto/engine/eng_list.c \ +crypto/engine/eng_pkey.c \ +crypto/engine/eng_table.c \ +crypto/engine/tb_asnmth.c \ +crypto/engine/tb_cipher.c \ +crypto/engine/tb_dh.c \ +crypto/engine/tb_digest.c \ +crypto/engine/tb_dsa.c \ +crypto/engine/tb_ecdh.c \ +crypto/engine/tb_ecdsa.c \ +crypto/engine/tb_pkmeth.c \ +crypto/engine/tb_rand.c \ +crypto/engine/tb_rsa.c \ +crypto/engine/tb_store.c \ +crypto/err/err.c \ +crypto/err/err_all.c \ +crypto/err/err_prn.c \ +crypto/evp/bio_b64.c \ +crypto/evp/bio_enc.c \ +crypto/evp/bio_md.c \ +crypto/evp/bio_ok.c \ +crypto/evp/c_all.c \ +crypto/evp/c_allc.c \ +crypto/evp/c_alld.c \ +crypto/evp/digest.c \ +crypto/evp/e_aes.c \ +crypto/evp/e_aes_cbc_hmac_sha1.c \ +crypto/evp/e_bf.c \ +crypto/evp/e_des.c \ +crypto/evp/e_des3.c \ +crypto/evp/e_null.c \ +crypto/evp/e_old.c \ +crypto/evp/e_rc2.c \ +crypto/evp/e_rc4.c \ +crypto/evp/e_rc4_hmac_md5.c \ +crypto/evp/e_rc5.c \ +crypto/evp/e_xcbc_d.c \ +crypto/evp/encode.c \ +crypto/evp/evp_acnf.c \ +crypto/evp/evp_cnf.c \ +crypto/evp/evp_enc.c \ +crypto/evp/evp_err.c \ +crypto/evp/evp_key.c \ +crypto/evp/evp_lib.c \ +crypto/evp/evp_pbe.c \ +crypto/evp/evp_pkey.c \ +crypto/evp/m_dss.c \ +crypto/evp/m_dss1.c \ +crypto/evp/m_ecdsa.c \ +crypto/evp/m_md4.c \ +crypto/evp/m_md5.c \ +crypto/evp/m_mdc2.c \ +crypto/evp/m_null.c \ +crypto/evp/m_ripemd.c \ +crypto/evp/m_sha1.c \ +crypto/evp/m_sigver.c \ +crypto/evp/m_wp.c \ +crypto/evp/names.c \ +crypto/evp/p5_crpt.c \ +crypto/evp/p5_crpt2.c \ +crypto/evp/p_dec.c \ +crypto/evp/p_enc.c \ +crypto/evp/p_lib.c \ +crypto/evp/p_open.c \ +crypto/evp/p_seal.c \ +crypto/evp/p_sign.c \ +crypto/evp/p_verify.c \ +crypto/evp/pmeth_fn.c \ +crypto/evp/pmeth_gn.c \ +crypto/evp/pmeth_lib.c \ +crypto/hmac/hm_ameth.c \ +crypto/hmac/hm_pmeth.c \ +crypto/hmac/hmac.c \ +crypto/krb5/krb5_asn.c \ +crypto/lhash/lh_stats.c \ +crypto/lhash/lhash.c \ +crypto/md4/md4_dgst.c \ +crypto/md4/md4_one.c \ +crypto/md5/md5_dgst.c \ +crypto/md5/md5_one.c \ +crypto/modes/cbc128.c \ +crypto/modes/ccm128.c \ +crypto/modes/cfb128.c \ +crypto/modes/ctr128.c \ +crypto/modes/gcm128.c \ +crypto/modes/ofb128.c \ +crypto/modes/xts128.c \ +crypto/o_init.c \ +crypto/objects/o_names.c \ +crypto/objects/obj_dat.c \ +crypto/objects/obj_err.c \ +crypto/objects/obj_lib.c \ +crypto/objects/obj_xref.c \ +crypto/ocsp/ocsp_asn.c \ +crypto/ocsp/ocsp_cl.c \ +crypto/ocsp/ocsp_err.c \ +crypto/ocsp/ocsp_ext.c \ +crypto/ocsp/ocsp_ht.c \ +crypto/ocsp/ocsp_lib.c \ +crypto/ocsp/ocsp_prn.c \ +crypto/ocsp/ocsp_srv.c \ +crypto/ocsp/ocsp_vfy.c \ +crypto/pem/pem_all.c \ +crypto/pem/pem_err.c \ +crypto/pem/pem_info.c \ +crypto/pem/pem_lib.c \ +crypto/pem/pem_oth.c \ +crypto/pem/pem_pk8.c \ +crypto/pem/pem_pkey.c \ +crypto/pem/pem_seal.c \ +crypto/pem/pem_sign.c \ +crypto/pem/pem_x509.c \ +crypto/pem/pem_xaux.c \ +crypto/pem/pvkfmt.c \ +crypto/pkcs12/p12_add.c \ +crypto/pkcs12/p12_asn.c \ +crypto/pkcs12/p12_attr.c \ +crypto/pkcs12/p12_crpt.c \ +crypto/pkcs12/p12_crt.c \ +crypto/pkcs12/p12_decr.c \ +crypto/pkcs12/p12_init.c \ +crypto/pkcs12/p12_key.c \ +crypto/pkcs12/p12_kiss.c \ +crypto/pkcs12/p12_mutl.c \ +crypto/pkcs12/p12_npas.c \ +crypto/pkcs12/p12_p8d.c \ +crypto/pkcs12/p12_p8e.c \ +crypto/pkcs12/p12_utl.c \ +crypto/pkcs12/pk12err.c \ +crypto/pkcs7/pk7_asn1.c \ +crypto/pkcs7/pk7_attr.c \ +crypto/pkcs7/pk7_doit.c \ +crypto/pkcs7/pk7_lib.c \ +crypto/pkcs7/pk7_mime.c \ +crypto/pkcs7/pk7_smime.c \ +crypto/pkcs7/pkcs7err.c \ +crypto/pqueue/pqueue.c \ +crypto/rand/md_rand.c \ +crypto/rand/rand_egd.c \ +crypto/rand/rand_err.c \ +crypto/rand/rand_lib.c \ +crypto/rand/rand_unix.c \ +crypto/rand/rand_win.c \ +crypto/rand/randfile.c \ +crypto/rc2/rc2_cbc.c \ +crypto/rc2/rc2_ecb.c \ +crypto/rc2/rc2_skey.c \ +crypto/rc2/rc2cfb64.c \ +crypto/rc2/rc2ofb64.c \ +crypto/rc4/rc4_enc.c \ +crypto/rc4/rc4_skey.c \ +crypto/rc4/rc4_utl.c \ +crypto/ripemd/rmd_dgst.c \ +crypto/ripemd/rmd_one.c \ +crypto/rsa/rsa_ameth.c \ +crypto/rsa/rsa_asn1.c \ +crypto/rsa/rsa_chk.c \ +crypto/rsa/rsa_crpt.c \ +crypto/rsa/rsa_eay.c \ +crypto/rsa/rsa_err.c \ +crypto/rsa/rsa_gen.c \ +crypto/rsa/rsa_lib.c \ +crypto/rsa/rsa_none.c \ +crypto/rsa/rsa_null.c \ +crypto/rsa/rsa_oaep.c \ +crypto/rsa/rsa_pk1.c \ +crypto/rsa/rsa_pmeth.c \ +crypto/rsa/rsa_prn.c \ +crypto/rsa/rsa_pss.c \ +crypto/rsa/rsa_saos.c \ +crypto/rsa/rsa_sign.c \ +crypto/rsa/rsa_ssl.c \ +crypto/rsa/rsa_x931.c \ +crypto/sha/sha1_one.c \ +crypto/sha/sha1dgst.c \ +crypto/sha/sha256.c \ +crypto/sha/sha512.c \ +crypto/sha/sha_dgst.c \ +crypto/srp/srp_lib.c \ +crypto/srp/srp_vfy.c \ +crypto/stack/stack.c \ +crypto/ts/ts_err.c \ +crypto/txt_db/txt_db.c \ +crypto/ui/ui_compat.c \ +crypto/ui/ui_err.c \ +crypto/ui/ui_lib.c \ crypto/ui/ui_openssl.c \ +crypto/ui/ui_util.c \ +crypto/x509/by_dir.c \ +crypto/x509/by_file.c \ +crypto/x509/x509_att.c \ +crypto/x509/x509_cmp.c \ +crypto/x509/x509_d2.c \ +crypto/x509/x509_def.c \ +crypto/x509/x509_err.c \ +crypto/x509/x509_ext.c \ +crypto/x509/x509_lu.c \ +crypto/x509/x509_obj.c \ +crypto/x509/x509_r2x.c \ +crypto/x509/x509_req.c \ +crypto/x509/x509_set.c \ +crypto/x509/x509_trs.c \ +crypto/x509/x509_txt.c \ +crypto/x509/x509_v3.c \ +crypto/x509/x509_vfy.c \ +crypto/x509/x509_vpm.c \ +crypto/x509/x509cset.c \ +crypto/x509/x509name.c \ +crypto/x509/x509rset.c \ +crypto/x509/x509spki.c \ +crypto/x509/x509type.c \ +crypto/x509/x_all.c \ +crypto/x509v3/pcy_cache.c \ +crypto/x509v3/pcy_data.c \ +crypto/x509v3/pcy_lib.c \ +crypto/x509v3/pcy_map.c \ +crypto/x509v3/pcy_node.c \ +crypto/x509v3/pcy_tree.c \ +crypto/x509v3/v3_akey.c \ +crypto/x509v3/v3_akeya.c \ +crypto/x509v3/v3_alt.c \ +crypto/x509v3/v3_bcons.c \ +crypto/x509v3/v3_bitst.c \ +crypto/x509v3/v3_conf.c \ +crypto/x509v3/v3_cpols.c \ +crypto/x509v3/v3_crld.c \ +crypto/x509v3/v3_enum.c \ +crypto/x509v3/v3_extku.c \ +crypto/x509v3/v3_genn.c \ +crypto/x509v3/v3_ia5.c \ +crypto/x509v3/v3_info.c \ +crypto/x509v3/v3_int.c \ +crypto/x509v3/v3_lib.c \ +crypto/x509v3/v3_ncons.c \ +crypto/x509v3/v3_ocsp.c \ +crypto/x509v3/v3_pci.c \ +crypto/x509v3/v3_pcia.c \ +crypto/x509v3/v3_pcons.c \ +crypto/x509v3/v3_pku.c \ +crypto/x509v3/v3_pmaps.c \ +crypto/x509v3/v3_prn.c \ +crypto/x509v3/v3_purp.c \ +crypto/x509v3/v3_skey.c \ +crypto/x509v3/v3_sxnet.c \ +crypto/x509v3/v3_utl.c \ +crypto/x509v3/v3err.c \ " -OPENSSL_PATCHES_handshake_cutthrough_SOURCES="\ -apps/s_client.c \ -ssl/s3_clnt.c \ -ssl/s3_lib.c \ -ssl/ssl.h \ -ssl/ssl3.h \ -ssl/ssl_lib.c \ -ssl/ssltest.c \ -test/testssl \ +OPENSSL_CRYPTO_SOURCES_arm="\ +crypto/aes/asm/aes-armv4.S \ +crypto/aes/asm/bsaes-armv7.S \ +crypto/armcap.c \ +crypto/armv4cpuid.S \ +crypto/bn/asm/armv4-gf2m.S \ +crypto/bn/asm/armv4-mont.S \ +crypto/modes/asm/ghash-armv4.S \ +crypto/sha/asm/sha1-armv4-large.S \ +crypto/sha/asm/sha256-armv4.S \ +crypto/sha/asm/sha512-armv4.S \ " -OPENSSL_PATCHES_small_records_SOURCES="\ -ssl/d1_pkt.c \ -ssl/s23_srvr.c \ -ssl/s3_both.c \ -ssl/s3_pkt.c \ -ssl/ssl.h \ -ssl/ssl3.h \ -ssl/ssltest.c \ -test/testssl \ +OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm="\ +crypto/aes/aes_core.c \ +crypto/mem_clr.c \ +" + +OPENSSL_CRYPTO_SOURCES_arm64="\ +" + +OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm64="\ +" + +OPENSSL_CRYPTO_SOURCES_mips="\ +crypto/aes/asm/aes-mips.S \ +crypto/bn/asm/bn-mips.S \ +crypto/bn/asm/mips-mont.S \ +crypto/sha/asm/sha1-mips.S \ +crypto/sha/asm/sha256-mips.S \ +" + +OPENSSL_CRYPTO_SOURCES_EXCLUDES_mips="\ +crypto/aes/aes_core.c \ +crypto/bn/bn_asm.c \ +" + +OPENSSL_CRYPTO_SOURCES_x86="\ +crypto/aes/asm/aes-586.S \ +crypto/aes/asm/aesni-x86.S \ +crypto/aes/asm/vpaes-x86.S \ +crypto/bf/asm/bf-586.S \ +crypto/bn/asm/bn-586.S \ +crypto/bn/asm/co-586.S \ +crypto/bn/asm/x86-gf2m.S \ +crypto/bn/asm/x86-mont.S \ +crypto/des/asm/crypt586.S \ +crypto/des/asm/des-586.S \ +crypto/md5/asm/md5-586.S \ +crypto/modes/asm/ghash-x86.S \ +crypto/sha/asm/sha1-586.S \ +crypto/sha/asm/sha256-586.S \ +crypto/sha/asm/sha512-586.S \ +crypto/x86cpuid.S \ +" + +OPENSSL_CRYPTO_SOURCES_EXCLUDES_x86="\ +crypto/aes/aes_core.c \ +crypto/aes/aes_cbc.c \ +crypto/bf/bf_enc.c \ +crypto/bn/bn_asm.c \ +crypto/des/des_enc.c \ +crypto/des/fcrypt_b.c \ +crypto/mem_clr.c \ +" + +OPENSSL_CRYPTO_SOURCES_x86_64="\ +crypto/aes/asm/aes-x86_64.S \ +crypto/aes/asm/aesni-x86_64.S \ +crypto/aes/asm/aesni-sha1-x86_64.S \ +crypto/aes/asm/bsaes-x86_64.S \ +crypto/aes/asm/vpaes-x86_64.S \ +crypto/bn/asm/modexp512-x86_64.S \ +crypto/bn/asm/x86_64-gcc.c \ +crypto/bn/asm/x86_64-gf2m.S \ +crypto/bn/asm/x86_64-mont.S \ +crypto/bn/asm/x86_64-mont5.S \ +crypto/md5/asm/md5-x86_64.S \ +crypto/modes/asm/ghash-x86_64.S \ +crypto/rc4/asm/rc4-md5-x86_64.S \ +crypto/rc4/asm/rc4-x86_64.S \ +crypto/sha/asm/sha1-x86_64.S \ +crypto/sha/asm/sha256-x86_64.S \ +crypto/sha/asm/sha512-x86_64.S \ +crypto/x86_64cpuid.S \ " -OPENSSL_PATCHES_jsse_SOURCES="\ -ssl/ssl.h \ -ssl/d1_clnt.c \ +OPENSSL_CRYPTO_SOURCES_EXCLUDES_x86_64="\ +crypto/aes/aes_cbc.c \ +crypto/aes/aes_core.c \ +crypto/bn/bn_asm.c \ +crypto/mem_clr.c \ +crypto/rc4/rc4_enc.c \ +crypto/rc4/rc4_skey.c \ +" + +OPENSSL_SSL_INCLUDES="\ +. \ +include \ +crypto \ +" + +OPENSSL_SSL_SOURCES="\ +ssl/bio_ssl.c \ +ssl/d1_both.c \ +ssl/d1_enc.c \ +ssl/d1_lib.c \ +ssl/d1_pkt.c \ +ssl/d1_srtp.c \ +ssl/kssl.c \ ssl/s23_clnt.c \ +ssl/s23_lib.c \ +ssl/s23_meth.c \ +ssl/s23_pkt.c \ +ssl/s23_srvr.c \ +ssl/s2_clnt.c \ +ssl/s2_enc.c \ +ssl/s2_lib.c \ +ssl/s2_meth.c \ +ssl/s2_pkt.c \ +ssl/s2_srvr.c \ ssl/s3_both.c \ +ssl/s3_cbc.c \ ssl/s3_clnt.c \ +ssl/s3_enc.c \ +ssl/s3_lib.c \ +ssl/s3_meth.c \ +ssl/s3_pkt.c \ ssl/s3_srvr.c \ +ssl/ssl_algs.c \ +ssl/ssl_asn1.c \ +ssl/ssl_cert.c \ ssl/ssl_ciph.c \ ssl/ssl_err.c \ +ssl/ssl_err2.c \ ssl/ssl_lib.c \ -ssl/ssl_locl.h ssl/ssl_rsa.c \ ssl/ssl_sess.c \ +ssl/ssl_stat.c \ +ssl/ssl_txt.c \ +ssl/t1_clnt.c \ +ssl/t1_enc.c \ +ssl/t1_lib.c \ +ssl/t1_meth.c \ +ssl/t1_reneg.c \ +ssl/t1_srvr.c \ +ssl/tls_srp.c \ " -OPENSSL_PATCHES_npn_SOURCES="\ +OPENSSL_APPS_DEFINES="\ +MONOLITH \ +" + +OPENSSL_APPS_INCLUDES="\ +. \ +include \ +" + +OPENSSL_APPS_SOURCES="\ +apps/app_rand.c \ apps/apps.c \ -apps/apps.h \ +apps/asn1pars.c \ +apps/ca.c \ +apps/ciphers.c \ +apps/cms.c \ +apps/crl.c \ +apps/crl2p7.c \ +apps/dgst.c \ +apps/dh.c \ +apps/dhparam.c \ +apps/dsa.c \ +apps/dsaparam.c \ +apps/ecparam.c \ +apps/ec.c \ +apps/enc.c \ +apps/engine.c \ +apps/errstr.c \ +apps/gendh.c \ +apps/gendsa.c \ +apps/genpkey.c \ +apps/genrsa.c \ +apps/nseq.c \ +apps/ocsp.c \ +apps/openssl.c \ +apps/passwd.c \ +apps/pkcs12.c \ +apps/pkcs7.c \ +apps/pkcs8.c \ +apps/pkey.c \ +apps/pkeyparam.c \ +apps/pkeyutl.c \ +apps/prime.c \ +apps/rand.c \ +apps/req.c \ +apps/rsa.c \ +apps/rsautl.c \ +apps/s_cb.c \ apps/s_client.c \ apps/s_server.c \ -include/openssl/ssl.h \ -include/openssl/ssl3.h \ -include/openssl/tls1.h \ -ssl/s3_both.c \ -ssl/s3_clnt.c \ -ssl/s3_lib.c \ -ssl/s3_pkt.c \ -ssl/s3_srvr.c \ -ssl/ssl.h \ -ssl/ssl3.h \ -ssl/ssl_err.c \ -ssl/ssl_lib.c \ -ssl/ssl_locl.h \ -ssl/t1_lib.c \ -ssl/tls1.h \ +apps/s_socket.c \ +apps/s_time.c \ +apps/sess_id.c \ +apps/smime.c \ +apps/speed.c \ +apps/spkac.c \ +apps/srp.c \ +apps/verify.c \ +apps/version.c \ +apps/x509.c \ " + +OPENSSL_PATCHES="\ +progs.patch \ +handshake_cutthrough.patch \ +jsse.patch \ +channelid.patch \ +eng_dyn_dirs.patch \ +fix_clang_build.patch \ +tls12_digests.patch \ +alpn.patch \ +cbc_record_splitting.patch \ +dsa_nonce.patch \ +ecdhe_psk.patch \ +wincrypt.patch \ +tls_psk_hint.patch \ +arm_asm.patch \ +psk_client_callback_128_byte_id_bug.patch \ +early_ccs.patch \ +" + +source ./openssl.trusty.config diff --git a/app/openssl/openssl.trusty.config b/app/openssl/openssl.trusty.config new file mode 100644 index 00000000..9710ad06 --- /dev/null +++ b/app/openssl/openssl.trusty.config @@ -0,0 +1,278 @@ +CONFIGURE_ARGS_TRUSTY="\ +-DL_ENDIAN \ +linux-generic32::: \ +no-camellia \ +no-capieng \ +no-cast \ +no-cms \ +no-comp \ +no-conf \ +no-des \ +no-dso \ +no-dtls1 \ +no-err \ +no-gost \ +no-gmp \ +no-heartbeats \ +no-idea \ +no-jpake \ +no-krb5 \ +no-locking \ +no-md2 \ +no-md4 \ +no-md5 \ +no-mdc2 \ +no-ocsp \ +no-pem \ +no-pkcs12 \ +no-pqueue \ +no-rc2 \ +no-rc4 \ +no-rc5 \ +no-rc5 \ +no-rdrand \ +no-rfc3779 \ +no-ripemd \ +no-rsax \ +no-sctp \ +no-seed \ +no-sha0 \ +no-srp \ +no-ssl \ +no-static_engine \ +no-store \ +no-threads \ +no-ts \ +no-txt_db \ +no-ui \ +no-whirlpool \ +" + +# Trusty-specific compiler defines for crypto/ library. +# +OPENSSL_CRYPTO_TRUSTY_DEFINES="\ +GETPID_IS_MEANINGLESS \ +NO_WINDOWS_BRAINDEATH \ +" + +OPENSSL_CRYPTO_TRUSTY_DEFINES_arm="\ +OPENSSL_BN_ASM_GF2m \ +OPENSSL_BN_ASM_MONT \ +GHASH_ASM \ +AES_ASM \ +SHA1_ASM \ +SHA256_ASM \ +SHA512_ASM \ +" + +OPENSSL_CRYPTO_TRUSTY_DEFINES_mips="" + +OPENSSL_CRYPTO_TRUSTY_DEFINES_x86="" + +OPENSSL_CRYPTO_TRUSTY_DEFINES_x86_64="" + +OPENSSL_CRYPTO_TRUSTY_INCLUDES="\ +. \ +include \ +crypto \ +crypto/asn1 \ +crypto/evp \ +crypto/modes \ +include \ +include/openssl \ +" + +OPENSSL_CRYPTO_TRUSTY_SOURCES="\ +crypto/aes/aes_cbc.c \ +crypto/aes/aes_misc.c \ +crypto/asn1/a_bitstr.c \ +crypto/asn1/a_d2i_fp.c \ +crypto/asn1/a_int.c \ +crypto/asn1/ameth_lib.c \ +crypto/asn1/a_object.c \ +crypto/asn1/a_octet.c \ +crypto/asn1/asn1_lib.c \ +crypto/asn1/asn_pack.c \ +crypto/asn1/a_type.c \ +crypto/asn1/d2i_pr.c \ +crypto/asn1/f_int.c \ +crypto/asn1/i2d_pr.c \ +crypto/asn1/p8_pkey.c \ +crypto/asn1/tasn_dec.c \ +crypto/asn1/tasn_enc.c \ +crypto/asn1/tasn_fre.c \ +crypto/asn1/tasn_new.c \ +crypto/asn1/tasn_typ.c \ +crypto/asn1/tasn_utl.c \ +crypto/asn1/t_pkey.c \ +crypto/asn1/t_x509.c \ +crypto/asn1/x_algor.c \ +crypto/asn1/x_attrib.c \ +crypto/asn1/x_bignum.c \ +crypto/asn1/x_long.c \ +crypto/asn1/x_pubkey.c \ +crypto/asn1/x_sig.c \ +crypto/bio/bio_lib.c \ +crypto/bio/b_print.c \ +crypto/bio/bss_mem.c \ +crypto/bn/bn_add.c \ +crypto/bn/bn_asm.c \ +crypto/bn/bn_blind.c \ +crypto/bn/bn_ctx.c \ +crypto/bn/bn_div.c \ +crypto/bn/bn_exp2.c \ +crypto/bn/bn_exp.c \ +crypto/bn/bn_gcd.c \ +crypto/bn/bn_gf2m.c \ +crypto/bn/bn_kron.c \ +crypto/bn/bn_lib.c \ +crypto/bn/bn_mod.c \ +crypto/bn/bn_mont.c \ +crypto/bn/bn_mul.c \ +crypto/bn/bn_nist.c \ +crypto/bn/bn_prime.c \ +crypto/bn/bn_print.c \ +crypto/bn/bn_rand.c \ +crypto/bn/bn_recp.c \ +crypto/bn/bn_shift.c \ +crypto/bn/bn_sqr.c \ +crypto/bn/bn_sqrt.c \ +crypto/bn/bn_word.c \ +crypto/buffer/buffer.c \ +crypto/buffer/buf_str.c \ +crypto/cmac/cmac.c \ +crypto/cmac/cm_ameth.c \ +crypto/cmac/cm_pmeth.c \ +Crypto-config.mk \ +crypto/cryptlib.c \ +crypto/dh/dh_ameth.c \ +crypto/dh/dh_asn1.c \ +crypto/dh/dh_check.c \ +crypto/dh/dh_gen.c \ +crypto/dh/dh_key.c \ +crypto/dh/dh_lib.c \ +crypto/dh/dh_pmeth.c \ +crypto/dsa/dsa_ameth.c \ +crypto/dsa/dsa_asn1.c \ +crypto/dsa/dsa_gen.c \ +crypto/dsa/dsa_key.c \ +crypto/dsa/dsa_lib.c \ +crypto/dsa/dsa_ossl.c \ +crypto/dsa/dsa_pmeth.c \ +crypto/dsa/dsa_sign.c \ +crypto/dsa/dsa_vrf.c \ +crypto/ecdh/ech_key.c \ +crypto/ecdh/ech_lib.c \ +crypto/ecdh/ech_ossl.c \ +crypto/ecdsa/ecs_asn1.c \ +crypto/ecdsa/ecs_lib.c \ +crypto/ecdsa/ecs_ossl.c \ +crypto/ecdsa/ecs_sign.c \ +crypto/ecdsa/ecs_vrf.c \ +crypto/ec/ec2_mult.c \ +crypto/ec/ec2_oct.c \ +crypto/ec/ec2_smpl.c \ +crypto/ec/ec_ameth.c \ +crypto/ec/ec_asn1.c \ +crypto/ec/ec_curve.c \ +crypto/ec/ec_cvt.c \ +crypto/ec/ec_key.c \ +crypto/ec/eck_prn.c \ +crypto/ec/ec_lib.c \ +crypto/ec/ec_mult.c \ +crypto/ec/ec_oct.c \ +crypto/ec/ec_pmeth.c \ +crypto/ec/ecp_mont.c \ +crypto/ec/ecp_nist.c \ +crypto/ec/ecp_oct.c \ +crypto/ec/ec_print.c \ +crypto/ec/ecp_smpl.c \ +crypto/engine/eng_init.c \ +crypto/engine/eng_lib.c \ +crypto/engine/eng_table.c \ +crypto/engine/tb_asnmth.c \ +crypto/engine/tb_cipher.c \ +crypto/engine/tb_dh.c \ +crypto/engine/tb_digest.c \ +crypto/engine/tb_dsa.c \ +crypto/engine/tb_ecdh.c \ +crypto/engine/tb_ecdsa.c \ +crypto/engine/tb_pkmeth.c \ +crypto/engine/tb_rand.c \ +crypto/engine/tb_rsa.c \ +crypto/err/err.c \ +crypto/evp/digest.c \ +crypto/evp/e_aes.c \ +crypto/evp/evp_enc.c \ +crypto/evp/evp_lib.c \ +crypto/evp/evp_pkey.c \ +crypto/evp/m_sha1.c \ +crypto/evp/m_sigver.c \ +crypto/evp/names.c \ +crypto/evp/p_lib.c \ +crypto/evp/pmeth_fn.c \ +crypto/evp/pmeth_gn.c \ +crypto/evp/pmeth_lib.c \ +crypto/ex_data.c \ +crypto/hmac/hmac.c \ +crypto/hmac/hm_ameth.c \ +crypto/hmac/hm_pmeth.c \ +crypto/lhash/lhash.c \ +crypto/mem.c \ +crypto/mem_clr.c \ +crypto/mem_dbg.c \ +crypto/modes/cbc128.c \ +crypto/modes/ctr128.c \ +crypto/objects/obj_dat.c \ +crypto/objects/obj_xref.c \ +crypto/objects/o_names.c \ +crypto/pkcs7/pk7_lib.c \ +crypto/rand/md_rand.c \ +crypto/rand/rand_lib.c \ +crypto/rsa/rsa_ameth.c \ +crypto/rsa/rsa_asn1.c \ +crypto/rsa/rsa_chk.c \ +crypto/rsa/rsa_crpt.c \ +crypto/rsa/rsa_eay.c \ +crypto/rsa/rsa_gen.c \ +crypto/rsa/rsa_lib.c \ +crypto/rsa/rsa_none.c \ +crypto/rsa/rsa_oaep.c \ +crypto/rsa/rsa_pk1.c \ +crypto/rsa/rsa_pmeth.c \ +crypto/rsa/rsa_pss.c \ +crypto/rsa/rsa_saos.c \ +crypto/rsa/rsa_sign.c \ +crypto/rsa/rsa_ssl.c \ +crypto/rsa/rsa_x931.c \ +crypto/sha/sha1_one.c \ +crypto/sha/sha1dgst.c \ +crypto/sha/sha256.c \ +crypto/sha/sha512.c \ +crypto/stack/stack.c \ +crypto/x509v3/v3_utl.c \ +crypto/x509/x_all.c \ +" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_arm="\ +crypto/aes/asm/aes-armv4.S \ +crypto/sha/asm/sha1-armv4-large.S \ +crypto/sha/asm/sha256-armv4.S \ +crypto/sha/asm/sha512-armv4.S \ +crypto/bn/asm/armv4-gf2m.S \ +crypto/bn/asm/armv4-mont.S \ +" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_arm="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_mips="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_mips="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_x86="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_x86="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_x86_64="" + +OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_x86_64="" diff --git a/app/openssl/openssl.version b/app/openssl/openssl.version index 721c467b..2e849911 100644 --- a/app/openssl/openssl.version +++ b/app/openssl/openssl.version @@ -1,2 +1 @@ -# also update ThirdPartyProject.prop -OPENSSL_VERSION=1.0.0e +OPENSSL_VERSION=1.0.1g diff --git a/app/openssl/patches/README b/app/openssl/patches/README index 54b6e068..2ff69282 100644 --- a/app/openssl/patches/README +++ b/app/openssl/patches/README @@ -3,18 +3,6 @@ progs.patch: Fixup sources under the apps/ directory that are not built under the android environment. -small_records.patch: - -Reduce OpenSSL memory consumption. -SSL records may be as large as 16K, but are typically < 2K. In -addition, a historic bug in Windows allowed records to be as large -32K. OpenSSL statically allocates read and write buffers (34K and -18K respectively) used for processing records. -With this patch, OpenSSL statically allocates 4K + 4K buffers, with -the option of dynamically growing buffers to 34K + 4K, which is a -saving of 44K per connection for the typical case. - - handshake_cutthrough.patch Enables SSL3+ clients to send application data immediately following the @@ -26,14 +14,51 @@ jsse.patch Support for JSSE implementation based on OpenSSL. -npn.patch +channelid.patch + +Implements TLS Channel ID support as both a client and a server. +See http://tools.ietf.org/html/draft-balfanz-tls-channelid-00. + +eng_dyn_dirs.patch + +Fixes the case of having multiple DIR_ADD commands sent to eng_dyn + +fix_clang_build.patch + +Fixes the Clang based build. + +tls12_digests.patch + +Fixes a bug with handling TLS 1.2 and digest functions for DSA and ECDSA +keys. + +alpn.patch + +This change adds support for ALPN in OpenSSL. ALPN is the IETF +blessed version of NPN and we'll be supporting both ALPN and NPN for +some time yet. + +cbc_record_splitting.patch + +BEAST attack client-side mitigation. Removes 0/n record splitting, adds 1/n-1 +record splitting. Record splitting is disabled by default. + +dsa_nonce.patch + +Adds an option to mix in hash of message and private key into (EC)DSA nonces to +make (EC)DSA more resilient to weaknesses in RNGs used for nonces. The feature +is disabled by default. + +ecdhe_psk.patch -Transport Layer Security (TLS) Next Protocol Negotiation Extension +Adds support for ECDHE Pre-Shared Key (PSK) TLS cipher suites. -sslv3_uninit_padding.patch +tls_psk_hint.patch -This patch sets the padding for SSLv3 block ciphers to zero. +Fixes issues with TLS-PSK identity hint implementation where +per-connection/session and per-context hints were being mixed up. -sha1_armv4_large.patch +psk_client_callback_128_byte_id_bug.patch -This patch eliminates memory stores to addresses below SP. +Fixes the issue where it was impossible to return a 128 byte long PSK identity +(the maximum supported length) from psk_client_callback. diff --git a/app/openssl/patches/apps_Android.mk b/app/openssl/patches/apps_Android.mk deleted file mode 100644 index 20cc5a9d..00000000 --- a/app/openssl/patches/apps_Android.mk +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2006 The Android Open Source Project - -LOCAL_PATH:= $(call my-dir) - -local_src_files:= \ - app_rand.c \ - apps.c \ - asn1pars.c \ - ca.c \ - ciphers.c \ - crl.c \ - crl2p7.c \ - dgst.c \ - dh.c \ - dhparam.c \ - dsa.c \ - dsaparam.c \ - ecparam.c \ - ec.c \ - enc.c \ - engine.c \ - errstr.c \ - gendh.c \ - gendsa.c \ - genpkey.c \ - genrsa.c \ - nseq.c \ - ocsp.c \ - openssl.c \ - passwd.c \ - pkcs12.c \ - pkcs7.c \ - pkcs8.c \ - pkey.c \ - pkeyparam.c \ - pkeyutl.c \ - prime.c \ - rand.c \ - req.c \ - rsa.c \ - rsautl.c \ - s_cb.c \ - s_client.c \ - s_server.c \ - s_socket.c \ - s_time.c \ - sess_id.c \ - smime.c \ - speed.c \ - spkac.c \ - verify.c \ - version.c \ - x509.c - -local_shared_libraries := \ - libssl \ - libcrypto - -local_c_includes := \ - external/openssl \ - external/openssl/include - -local_cflags := -DMONOLITH - -# These flags omit whole features from the commandline "openssl". -# However, portions of these features are actually turned on. -local_cflags += -DOPENSSL_NO_DTLS1 - -include $(CLEAR_VARS) -LOCAL_MODULE:= openssl -LOCAL_MODULE_TAGS := optional -LOCAL_SRC_FILES := $(local_src_files) -LOCAL_SHARED_LIBRARIES := $(local_shared_libraries) -LOCAL_C_INCLUDES := $(local_c_includes) -LOCAL_CFLAGS := $(local_cflags) -include $(LOCAL_PATH)/../android-config.mk -include $(BUILD_EXECUTABLE) - -include $(CLEAR_VARS) -LOCAL_MODULE:= openssl -LOCAL_MODULE_TAGS := optional -LOCAL_SRC_FILES := $(local_src_files) -LOCAL_SHARED_LIBRARIES := $(local_shared_libraries) -LOCAL_C_INCLUDES := $(local_c_includes) -LOCAL_CFLAGS := $(local_cflags) -include $(LOCAL_PATH)/../android-config.mk -include $(BUILD_HOST_EXECUTABLE) diff --git a/app/openssl/patches/crypto_Android.mk b/app/openssl/patches/crypto_Android.mk deleted file mode 100644 index 6f09fa53..00000000 --- a/app/openssl/patches/crypto_Android.mk +++ /dev/null @@ -1,559 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -arm_cflags := -DOPENSSL_BN_ASM_MONT -DAES_ASM -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM -arm_src_files := \ - aes/asm/aes-armv4.s \ - bn/asm/armv4-mont.s \ - sha/asm/sha1-armv4-large.s \ - sha/asm/sha256-armv4.s \ - sha/asm/sha512-armv4.s -non_arm_src_files := aes/aes_core.c - -local_src_files := \ - cryptlib.c \ - mem.c \ - mem_clr.c \ - mem_dbg.c \ - cversion.c \ - ex_data.c \ - cpt_err.c \ - ebcdic.c \ - uid.c \ - o_time.c \ - o_str.c \ - o_dir.c \ - aes/aes_cbc.c \ - aes/aes_cfb.c \ - aes/aes_ctr.c \ - aes/aes_ecb.c \ - aes/aes_misc.c \ - aes/aes_ofb.c \ - aes/aes_wrap.c \ - asn1/a_bitstr.c \ - asn1/a_bool.c \ - asn1/a_bytes.c \ - asn1/a_d2i_fp.c \ - asn1/a_digest.c \ - asn1/a_dup.c \ - asn1/a_enum.c \ - asn1/a_gentm.c \ - asn1/a_i2d_fp.c \ - asn1/a_int.c \ - asn1/a_mbstr.c \ - asn1/a_object.c \ - asn1/a_octet.c \ - asn1/a_print.c \ - asn1/a_set.c \ - asn1/a_sign.c \ - asn1/a_strex.c \ - asn1/a_strnid.c \ - asn1/a_time.c \ - asn1/a_type.c \ - asn1/a_utctm.c \ - asn1/a_utf8.c \ - asn1/a_verify.c \ - asn1/ameth_lib.c \ - asn1/asn1_err.c \ - asn1/asn1_gen.c \ - asn1/asn1_lib.c \ - asn1/asn1_par.c \ - asn1/asn_mime.c \ - asn1/asn_moid.c \ - asn1/asn_pack.c \ - asn1/bio_asn1.c \ - asn1/bio_ndef.c \ - asn1/d2i_pr.c \ - asn1/d2i_pu.c \ - asn1/evp_asn1.c \ - asn1/f_enum.c \ - asn1/f_int.c \ - asn1/f_string.c \ - asn1/i2d_pr.c \ - asn1/i2d_pu.c \ - asn1/n_pkey.c \ - asn1/nsseq.c \ - asn1/p5_pbe.c \ - asn1/p5_pbev2.c \ - asn1/p8_pkey.c \ - asn1/t_bitst.c \ - asn1/t_crl.c \ - asn1/t_pkey.c \ - asn1/t_req.c \ - asn1/t_spki.c \ - asn1/t_x509.c \ - asn1/t_x509a.c \ - asn1/tasn_dec.c \ - asn1/tasn_enc.c \ - asn1/tasn_fre.c \ - asn1/tasn_new.c \ - asn1/tasn_prn.c \ - asn1/tasn_typ.c \ - asn1/tasn_utl.c \ - asn1/x_algor.c \ - asn1/x_attrib.c \ - asn1/x_bignum.c \ - asn1/x_crl.c \ - asn1/x_exten.c \ - asn1/x_info.c \ - asn1/x_long.c \ - asn1/x_name.c \ - asn1/x_nx509.c \ - asn1/x_pkey.c \ - asn1/x_pubkey.c \ - asn1/x_req.c \ - asn1/x_sig.c \ - asn1/x_spki.c \ - asn1/x_val.c \ - asn1/x_x509.c \ - asn1/x_x509a.c \ - bf/bf_cfb64.c \ - bf/bf_ecb.c \ - bf/bf_enc.c \ - bf/bf_ofb64.c \ - bf/bf_skey.c \ - bio/b_dump.c \ - bio/b_print.c \ - bio/b_sock.c \ - bio/bf_buff.c \ - bio/bf_nbio.c \ - bio/bf_null.c \ - bio/bio_cb.c \ - bio/bio_err.c \ - bio/bio_lib.c \ - bio/bss_acpt.c \ - bio/bss_bio.c \ - bio/bss_conn.c \ - bio/bss_dgram.c \ - bio/bss_fd.c \ - bio/bss_file.c \ - bio/bss_log.c \ - bio/bss_mem.c \ - bio/bss_null.c \ - bio/bss_sock.c \ - bn/bn_add.c \ - bn/bn_asm.c \ - bn/bn_blind.c \ - bn/bn_const.c \ - bn/bn_ctx.c \ - bn/bn_div.c \ - bn/bn_err.c \ - bn/bn_exp.c \ - bn/bn_exp2.c \ - bn/bn_gcd.c \ - bn/bn_gf2m.c \ - bn/bn_kron.c \ - bn/bn_lib.c \ - bn/bn_mod.c \ - bn/bn_mont.c \ - bn/bn_mpi.c \ - bn/bn_mul.c \ - bn/bn_nist.c \ - bn/bn_prime.c \ - bn/bn_print.c \ - bn/bn_rand.c \ - bn/bn_recp.c \ - bn/bn_shift.c \ - bn/bn_sqr.c \ - bn/bn_sqrt.c \ - bn/bn_word.c \ - buffer/buf_err.c \ - buffer/buffer.c \ - comp/c_rle.c \ - comp/c_zlib.c \ - comp/comp_err.c \ - comp/comp_lib.c \ - conf/conf_api.c \ - conf/conf_def.c \ - conf/conf_err.c \ - conf/conf_lib.c \ - conf/conf_mall.c \ - conf/conf_mod.c \ - conf/conf_sap.c \ - des/cbc_cksm.c \ - des/cbc_enc.c \ - des/cfb64ede.c \ - des/cfb64enc.c \ - des/cfb_enc.c \ - des/des_enc.c \ - des/des_old.c \ - des/des_old2.c \ - des/ecb3_enc.c \ - des/ecb_enc.c \ - des/ede_cbcm_enc.c \ - des/enc_read.c \ - des/enc_writ.c \ - des/fcrypt.c \ - des/fcrypt_b.c \ - des/ofb64ede.c \ - des/ofb64enc.c \ - des/ofb_enc.c \ - des/pcbc_enc.c \ - des/qud_cksm.c \ - des/rand_key.c \ - des/read2pwd.c \ - des/rpc_enc.c \ - des/set_key.c \ - des/str2key.c \ - des/xcbc_enc.c \ - dh/dh_ameth.c \ - dh/dh_asn1.c \ - dh/dh_check.c \ - dh/dh_depr.c \ - dh/dh_err.c \ - dh/dh_gen.c \ - dh/dh_key.c \ - dh/dh_lib.c \ - dh/dh_pmeth.c \ - dsa/dsa_ameth.c \ - dsa/dsa_asn1.c \ - dsa/dsa_depr.c \ - dsa/dsa_err.c \ - dsa/dsa_gen.c \ - dsa/dsa_key.c \ - dsa/dsa_lib.c \ - dsa/dsa_ossl.c \ - dsa/dsa_pmeth.c \ - dsa/dsa_prn.c \ - dsa/dsa_sign.c \ - dsa/dsa_vrf.c \ - dso/dso_dl.c \ - dso/dso_dlfcn.c \ - dso/dso_err.c \ - dso/dso_lib.c \ - dso/dso_null.c \ - dso/dso_openssl.c \ - ec/ec2_mult.c \ - ec/ec2_smpl.c \ - ec/ec_ameth.c \ - ec/ec_asn1.c \ - ec/ec_check.c \ - ec/ec_curve.c \ - ec/ec_cvt.c \ - ec/ec_err.c \ - ec/ec_key.c \ - ec/ec_lib.c \ - ec/ec_mult.c \ - ec/ec_pmeth.c \ - ec/ec_print.c \ - ec/eck_prn.c \ - ec/ecp_mont.c \ - ec/ecp_nist.c \ - ec/ecp_smpl.c \ - ecdh/ech_err.c \ - ecdh/ech_key.c \ - ecdh/ech_lib.c \ - ecdh/ech_ossl.c \ - ecdsa/ecs_asn1.c \ - ecdsa/ecs_err.c \ - ecdsa/ecs_lib.c \ - ecdsa/ecs_ossl.c \ - ecdsa/ecs_sign.c \ - ecdsa/ecs_vrf.c \ - err/err.c \ - err/err_all.c \ - err/err_prn.c \ - evp/bio_b64.c \ - evp/bio_enc.c \ - evp/bio_md.c \ - evp/bio_ok.c \ - evp/c_all.c \ - evp/c_allc.c \ - evp/c_alld.c \ - evp/digest.c \ - evp/e_aes.c \ - evp/e_bf.c \ - evp/e_des.c \ - evp/e_des3.c \ - evp/e_null.c \ - evp/e_old.c \ - evp/e_rc2.c \ - evp/e_rc4.c \ - evp/e_rc5.c \ - evp/e_xcbc_d.c \ - evp/encode.c \ - evp/evp_acnf.c \ - evp/evp_enc.c \ - evp/evp_err.c \ - evp/evp_key.c \ - evp/evp_lib.c \ - evp/evp_pbe.c \ - evp/evp_pkey.c \ - evp/m_dss.c \ - evp/m_dss1.c \ - evp/m_ecdsa.c \ - evp/m_md4.c \ - evp/m_md5.c \ - evp/m_mdc2.c \ - evp/m_null.c \ - evp/m_ripemd.c \ - evp/m_sha1.c \ - evp/m_sigver.c \ - evp/m_wp.c \ - evp/names.c \ - evp/p5_crpt.c \ - evp/p5_crpt2.c \ - evp/p_dec.c \ - evp/p_enc.c \ - evp/p_lib.c \ - evp/p_open.c \ - evp/p_seal.c \ - evp/p_sign.c \ - evp/p_verify.c \ - evp/pmeth_fn.c \ - evp/pmeth_gn.c \ - evp/pmeth_lib.c \ - hmac/hm_ameth.c \ - hmac/hm_pmeth.c \ - hmac/hmac.c \ - krb5/krb5_asn.c \ - lhash/lh_stats.c \ - lhash/lhash.c \ - md4/md4_dgst.c \ - md4/md4_one.c \ - md5/md5_dgst.c \ - md5/md5_one.c \ - modes/cbc128.c \ - modes/cfb128.c \ - modes/ctr128.c \ - modes/ofb128.c \ - objects/o_names.c \ - objects/obj_dat.c \ - objects/obj_err.c \ - objects/obj_lib.c \ - objects/obj_xref.c \ - ocsp/ocsp_asn.c \ - ocsp/ocsp_cl.c \ - ocsp/ocsp_err.c \ - ocsp/ocsp_ext.c \ - ocsp/ocsp_ht.c \ - ocsp/ocsp_lib.c \ - ocsp/ocsp_prn.c \ - ocsp/ocsp_srv.c \ - ocsp/ocsp_vfy.c \ - pem/pem_all.c \ - pem/pem_err.c \ - pem/pem_info.c \ - pem/pem_lib.c \ - pem/pem_oth.c \ - pem/pem_pk8.c \ - pem/pem_pkey.c \ - pem/pem_seal.c \ - pem/pem_sign.c \ - pem/pem_x509.c \ - pem/pem_xaux.c \ - pem/pvkfmt.c \ - pkcs12/p12_add.c \ - pkcs12/p12_asn.c \ - pkcs12/p12_attr.c \ - pkcs12/p12_crpt.c \ - pkcs12/p12_crt.c \ - pkcs12/p12_decr.c \ - pkcs12/p12_init.c \ - pkcs12/p12_key.c \ - pkcs12/p12_kiss.c \ - pkcs12/p12_mutl.c \ - pkcs12/p12_npas.c \ - pkcs12/p12_p8d.c \ - pkcs12/p12_p8e.c \ - pkcs12/p12_utl.c \ - pkcs12/pk12err.c \ - pkcs7/pk7_asn1.c \ - pkcs7/pk7_attr.c \ - pkcs7/pk7_doit.c \ - pkcs7/pk7_lib.c \ - pkcs7/pk7_mime.c \ - pkcs7/pk7_smime.c \ - pkcs7/pkcs7err.c \ - rand/md_rand.c \ - rand/rand_egd.c \ - rand/rand_err.c \ - rand/rand_lib.c \ - rand/rand_unix.c \ - rand/randfile.c \ - rc2/rc2_cbc.c \ - rc2/rc2_ecb.c \ - rc2/rc2_skey.c \ - rc2/rc2cfb64.c \ - rc2/rc2ofb64.c \ - rc4/rc4_enc.c \ - rc4/rc4_skey.c \ - ripemd/rmd_dgst.c \ - ripemd/rmd_one.c \ - rsa/rsa_ameth.c \ - rsa/rsa_asn1.c \ - rsa/rsa_chk.c \ - rsa/rsa_eay.c \ - rsa/rsa_err.c \ - rsa/rsa_gen.c \ - rsa/rsa_lib.c \ - rsa/rsa_none.c \ - rsa/rsa_null.c \ - rsa/rsa_oaep.c \ - rsa/rsa_pk1.c \ - rsa/rsa_pmeth.c \ - rsa/rsa_prn.c \ - rsa/rsa_pss.c \ - rsa/rsa_saos.c \ - rsa/rsa_sign.c \ - rsa/rsa_ssl.c \ - rsa/rsa_x931.c \ - sha/sha1_one.c \ - sha/sha1dgst.c \ - sha/sha256.c \ - sha/sha512.c \ - sha/sha_dgst.c \ - stack/stack.c \ - ts/ts_err.c \ - txt_db/txt_db.c \ - ui/ui_compat.c \ - ui/ui_err.c \ - ui/ui_lib.c \ - ui/ui_openssl.c \ - ui/ui_util.c \ - x509/by_dir.c \ - x509/by_file.c \ - x509/x509_att.c \ - x509/x509_cmp.c \ - x509/x509_d2.c \ - x509/x509_def.c \ - x509/x509_err.c \ - x509/x509_ext.c \ - x509/x509_lu.c \ - x509/x509_obj.c \ - x509/x509_r2x.c \ - x509/x509_req.c \ - x509/x509_set.c \ - x509/x509_trs.c \ - x509/x509_txt.c \ - x509/x509_v3.c \ - x509/x509_vfy.c \ - x509/x509_vpm.c \ - x509/x509cset.c \ - x509/x509name.c \ - x509/x509rset.c \ - x509/x509spki.c \ - x509/x509type.c \ - x509/x_all.c \ - x509v3/pcy_cache.c \ - x509v3/pcy_data.c \ - x509v3/pcy_lib.c \ - x509v3/pcy_map.c \ - x509v3/pcy_node.c \ - x509v3/pcy_tree.c \ - x509v3/v3_akey.c \ - x509v3/v3_akeya.c \ - x509v3/v3_alt.c \ - x509v3/v3_bcons.c \ - x509v3/v3_bitst.c \ - x509v3/v3_conf.c \ - x509v3/v3_cpols.c \ - x509v3/v3_crld.c \ - x509v3/v3_enum.c \ - x509v3/v3_extku.c \ - x509v3/v3_genn.c \ - x509v3/v3_ia5.c \ - x509v3/v3_info.c \ - x509v3/v3_int.c \ - x509v3/v3_lib.c \ - x509v3/v3_ncons.c \ - x509v3/v3_ocsp.c \ - x509v3/v3_pci.c \ - x509v3/v3_pcia.c \ - x509v3/v3_pcons.c \ - x509v3/v3_pku.c \ - x509v3/v3_pmaps.c \ - x509v3/v3_prn.c \ - x509v3/v3_purp.c \ - x509v3/v3_skey.c \ - x509v3/v3_sxnet.c \ - x509v3/v3_utl.c \ - x509v3/v3err.c - -local_c_includes := \ - external/openssl \ - external/openssl/crypto/asn1 \ - external/openssl/crypto/evp \ - external/openssl/include \ - external/openssl/include/openssl \ - external/zlib - -local_c_flags := -DNO_WINDOWS_BRAINDEATH - -####################################### -# target static library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif - -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -LOCAL_C_INCLUDES += $(local_c_includes) -ifeq ($(TARGET_ARCH),arm) - LOCAL_SRC_FILES += $(arm_src_files) - LOCAL_CFLAGS += $(arm_cflags) -else - LOCAL_SRC_FILES += $(non_arm_src_files) -endif -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto_static -include $(BUILD_STATIC_LIBRARY) - -####################################### -# target shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -# Use the NDK prebuilt libz and libdl. -LOCAL_LDFLAGS += -lz -ldl -else -LOCAL_SHARED_LIBRARIES += libz libdl -endif - -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -LOCAL_C_INCLUDES += $(local_c_includes) -ifeq ($(TARGET_ARCH),arm) - LOCAL_SRC_FILES += $(arm_src_files) - LOCAL_CFLAGS += $(arm_cflags) -else - LOCAL_SRC_FILES += $(non_arm_src_files) -endif -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto -include $(BUILD_SHARED_LIBRARY) - -####################################### -# host shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -DPURIFY -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SRC_FILES += $(non_arm_src_files) -LOCAL_STATIC_LIBRARIES += libz -LOCAL_LDLIBS += -ldl -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto -include $(BUILD_HOST_SHARED_LIBRARY) - -######################################## -# host static library, which is used by some SDK tools. - -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_CFLAGS += $(local_c_flags) -DPURIFY -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SRC_FILES += $(non_arm_src_files) -LOCAL_STATIC_LIBRARIES += libz -LOCAL_LDLIBS += -ldl -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libcrypto_static -include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/app/openssl/patches/ssl_Android.mk b/app/openssl/patches/ssl_Android.mk deleted file mode 100644 index 487aabbc..00000000 --- a/app/openssl/patches/ssl_Android.mk +++ /dev/null @@ -1,98 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -local_c_includes := \ - external/openssl \ - external/openssl/include \ - external/openssl/crypto - -local_src_files:= \ - s2_meth.c \ - s2_srvr.c \ - s2_clnt.c \ - s2_lib.c \ - s2_enc.c \ - s2_pkt.c \ - s3_meth.c \ - s3_srvr.c \ - s3_clnt.c \ - s3_lib.c \ - s3_enc.c \ - s3_pkt.c \ - s3_both.c \ - s23_meth.c \ - s23_srvr.c \ - s23_clnt.c \ - s23_lib.c \ - s23_pkt.c \ - t1_meth.c \ - t1_srvr.c \ - t1_clnt.c \ - t1_lib.c \ - t1_enc.c \ - t1_reneg.c \ - ssl_lib.c \ - ssl_err2.c \ - ssl_cert.c \ - ssl_sess.c \ - ssl_ciph.c \ - ssl_stat.c \ - ssl_rsa.c \ - ssl_asn1.c \ - ssl_txt.c \ - ssl_algs.c \ - bio_ssl.c \ - ssl_err.c \ - kssl.c - -####################################### -# target static library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl_static -include $(BUILD_STATIC_LIBRARY) - -####################################### -# target shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SHARED_LIBRARIES += libcrypto -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl -include $(BUILD_SHARED_LIBRARY) - -####################################### -# host shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SHARED_LIBRARIES += libcrypto -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl -include $(BUILD_HOST_SHARED_LIBRARY) - -####################################### -# ssltest -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk -LOCAL_SRC_FILES:= ssltest.c -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SHARED_LIBRARIES := libssl libcrypto -LOCAL_MODULE:= ssltest -LOCAL_MODULE_TAGS := optional -include $(BUILD_EXECUTABLE) diff --git a/app/openssl/rules.mk b/app/openssl/rules.mk new file mode 100644 index 00000000..252dbbb3 --- /dev/null +++ b/app/openssl/rules.mk @@ -0,0 +1,40 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +MODULE := $(LOCAL_DIR) + +TARGET_ARCH := $(ARCH) +TARGET_2ND_ARCH := $(ARCH) + +# Reset local variables +LOCAL_CFLAGS := +LOCAL_C_INCLUDES := +LOCAL_SRC_FILES_$(TARGET_ARCH) := +LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) := +LOCAL_CFLAGS_$(TARGET_ARCH) := +LOCAL_CFLAGS_$(TARGET_2ND_ARCH) := +LOCAL_ADDITIONAL_DEPENDENCIES := + +# get openssl_cflags +MODULE_SRCDEPS += $(LOCAL_DIR)/build-config-trusty.mk +include $(LOCAL_DIR)/build-config-trusty.mk + +# get target_c_flags, target_c_includes, target_src_files +MODULE_SRCDEPS += $(LOCAL_DIR)/Crypto-config-trusty.mk +include $(LOCAL_DIR)/Crypto-config-trusty.mk + +MODULE_SRCS += $(addprefix $(LOCAL_DIR)/,$(LOCAL_SRC_FILES_$(ARCH))) + +MODULE_CFLAGS += $(LOCAL_CFLAGS) +MODULE_CFLAGS += -Wno-error=implicit-function-declaration + +# Global for other modules which include openssl headers +GLOBAL_DEFINES += OPENSSL_SYS_TRUSTY + +LOCAL_C_INCLUDES := $(patsubst external/openssl/%,%,$(LOCAL_C_INCLUDES)) +GLOBAL_INCLUDES += $(addprefix $(LOCAL_DIR)/,$(LOCAL_C_INCLUDES)) + +MODULE_DEPS := \ + lib/openssl-stubs \ + lib/libc-trusty + +include make/module.mk diff --git a/app/openssl/ssl/Android.mk b/app/openssl/ssl/Android.mk deleted file mode 100644 index 1319d5e1..00000000 --- a/app/openssl/ssl/Android.mk +++ /dev/null @@ -1,78 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -local_c_includes := \ - openssl \ - openssl/include \ - openssl/crypto - -local_src_files:= \ - s2_meth.c \ - s2_srvr.c \ - s2_clnt.c \ - s2_lib.c \ - s2_enc.c \ - s2_pkt.c \ - s3_meth.c \ - s3_srvr.c \ - s3_clnt.c \ - s3_lib.c \ - s3_enc.c \ - s3_pkt.c \ - s3_both.c \ - s23_meth.c \ - s23_srvr.c \ - s23_clnt.c \ - s23_lib.c \ - s23_pkt.c \ - t1_meth.c \ - t1_srvr.c \ - t1_clnt.c \ - t1_lib.c \ - t1_enc.c \ - t1_reneg.c \ - ssl_lib.c \ - ssl_err2.c \ - ssl_cert.c \ - ssl_sess.c \ - ssl_ciph.c \ - ssl_stat.c \ - ssl_rsa.c \ - ssl_asn1.c \ - ssl_txt.c \ - ssl_algs.c \ - bio_ssl.c \ - ssl_err.c \ - kssl.c - -####################################### -# target static library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl_static -include $(BUILD_STATIC_LIBRARY) - -####################################### -# target shared library -include $(CLEAR_VARS) -include $(LOCAL_PATH)/../android-config.mk - -ifneq ($(TARGET_ARCH),x86) -LOCAL_NDK_VERSION := 5 -LOCAL_SDK_VERSION := 9 -endif -LOCAL_SRC_FILES += $(local_src_files) -LOCAL_C_INCLUDES += $(local_c_includes) -LOCAL_SHARED_LIBRARIES += libcrypto -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE:= libssl -include $(BUILD_SHARED_LIBRARY) - - diff --git a/app/openssl/ssl/bio_ssl.c b/app/openssl/ssl/bio_ssl.c index eedac8a3..e9552cae 100644 --- a/app/openssl/ssl/bio_ssl.c +++ b/app/openssl/ssl/bio_ssl.c @@ -538,6 +538,7 @@ err: BIO *BIO_new_ssl_connect(SSL_CTX *ctx) { +#ifndef OPENSSL_NO_SOCK BIO *ret=NULL,*con=NULL,*ssl=NULL; if ((con=BIO_new(BIO_s_connect())) == NULL) @@ -549,6 +550,7 @@ BIO *BIO_new_ssl_connect(SSL_CTX *ctx) return(ret); err: if (con != NULL) BIO_free(con); +#endif return(NULL); } diff --git a/app/openssl/ssl/d1_both.c b/app/openssl/ssl/d1_both.c index 2180c6d4..2e8cf681 100644 --- a/app/openssl/ssl/d1_both.c +++ b/app/openssl/ssl/d1_both.c @@ -158,7 +158,6 @@ static unsigned char bitmask_end_values[] = {0xff, 0x01, 0x03, 0x07, 0x0f, 0x1 /* XDTLS: figure out the right values */ static unsigned int g_probable_mtu[] = {1500 - 28, 512 - 28, 256 - 28}; -static unsigned int dtls1_min_mtu(void); static unsigned int dtls1_guess_mtu(unsigned int curr_mtu); static void dtls1_fix_message_header(SSL *s, unsigned long frag_off, unsigned long frag_len); @@ -215,6 +214,12 @@ dtls1_hm_fragment_new(unsigned long frag_len, int reassembly) static void dtls1_hm_fragment_free(hm_fragment *frag) { + + if (frag->msg_header.is_ccs) + { + EVP_CIPHER_CTX_free(frag->msg_header.saved_retransmit_state.enc_write_ctx); + EVP_MD_CTX_destroy(frag->msg_header.saved_retransmit_state.write_hash); + } if (frag->fragment) OPENSSL_free(frag->fragment); if (frag->reassembly) OPENSSL_free(frag->reassembly); OPENSSL_free(frag); @@ -228,14 +233,14 @@ int dtls1_do_write(SSL *s, int type) unsigned int len, frag_off, mac_size, blocksize; /* AHA! Figure out the MTU, and stick to the right size */ - if ( ! (SSL_get_options(s) & SSL_OP_NO_QUERY_MTU)) + if (s->d1->mtu < dtls1_min_mtu() && !(SSL_get_options(s) & SSL_OP_NO_QUERY_MTU)) { s->d1->mtu = BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_QUERY_MTU, 0, NULL); /* I've seen the kernel return bogus numbers when it doesn't know * (initial write), so just make sure we have a reasonable number */ - if ( s->d1->mtu < dtls1_min_mtu()) + if (s->d1->mtu < dtls1_min_mtu()) { s->d1->mtu = 0; s->d1->mtu = dtls1_guess_mtu(s->d1->mtu); @@ -264,11 +269,10 @@ int dtls1_do_write(SSL *s, int type) return ret; mtu = s->d1->mtu - (DTLS1_HM_HEADER_LENGTH + DTLS1_RT_HEADER_LENGTH); } - - OPENSSL_assert(mtu > 0); /* should have something reasonable now */ - #endif + OPENSSL_assert(s->d1->mtu >= dtls1_min_mtu()); /* should have something reasonable now */ + if ( s->init_off == 0 && type == SSL3_RT_HANDSHAKE) OPENSSL_assert(s->init_num == (int)s->d1->w_msg_hdr.msg_len + DTLS1_HM_HEADER_LENGTH); @@ -315,9 +319,10 @@ int dtls1_do_write(SSL *s, int type) s->init_off -= DTLS1_HM_HEADER_LENGTH; s->init_num += DTLS1_HM_HEADER_LENGTH; - /* write atleast DTLS1_HM_HEADER_LENGTH bytes */ - if ( len <= DTLS1_HM_HEADER_LENGTH) - len += DTLS1_HM_HEADER_LENGTH; + if ( s->init_num > curr_mtu) + len = curr_mtu; + else + len = s->init_num; } dtls1_fix_message_header(s, frag_off, @@ -795,7 +800,13 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) *ok = 0; return i; } - OPENSSL_assert(i == DTLS1_HM_HEADER_LENGTH); + /* Handshake fails if message header is incomplete */ + if (i != DTLS1_HM_HEADER_LENGTH) + { + al=SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT,SSL_R_UNEXPECTED_MESSAGE); + goto f_err; + } /* parse the message fragment header */ dtls1_get_message_header(wire, &msg_hdr); @@ -867,7 +878,12 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) /* XDTLS: an incorrectly formatted fragment should cause the * handshake to fail */ - OPENSSL_assert(i == (int)frag_len); + if (i != (int)frag_len) + { + al=SSL3_AD_ILLEGAL_PARAMETER; + SSLerr(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT,SSL3_AD_ILLEGAL_PARAMETER); + goto f_err; + } *ok = 1; @@ -1075,7 +1091,11 @@ int dtls1_read_failed(SSL *s, int code) return code; } - if ( ! SSL_in_init(s)) /* done, no need to send a retransmit */ +#ifndef OPENSSL_NO_HEARTBEATS + if (!SSL_in_init(s) && !s->tlsext_hb_pending) /* done, no need to send a retransmit */ +#else + if (!SSL_in_init(s)) /* done, no need to send a retransmit */ +#endif { BIO_set_flags(SSL_get_rbio(s), BIO_FLAGS_READ); return code; @@ -1367,7 +1387,7 @@ dtls1_write_message_header(SSL *s, unsigned char *p) return p; } -static unsigned int +unsigned int dtls1_min_mtu(void) { return (g_probable_mtu[(sizeof(g_probable_mtu) / @@ -1408,3 +1428,181 @@ dtls1_get_ccs_header(unsigned char *data, struct ccs_header_st *ccs_hdr) ccs_hdr->type = *(data++); } + +int dtls1_shutdown(SSL *s) + { + int ret; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s)) && + !(s->shutdown & SSL_SENT_SHUTDOWN)) + { + ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s)); + if (ret < 0) return -1; + + if (ret == 0) + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN, 1, NULL); + } +#endif + ret = ssl3_shutdown(s); +#ifndef OPENSSL_NO_SCTP + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN, 0, NULL); +#endif + return ret; + } + +#ifndef OPENSSL_NO_HEARTBEATS +int +dtls1_process_heartbeat(SSL *s) + { + unsigned char *p = &s->s3->rrec.data[0], *pl; + unsigned short hbtype; + unsigned int payload; + unsigned int padding = 16; /* Use minimum padding */ + + if (s->msg_callback) + s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT, + &s->s3->rrec.data[0], s->s3->rrec.length, + s, s->msg_callback_arg); + + /* Read type and payload length first */ + if (1 + 2 + 16 > s->s3->rrec.length) + return 0; /* silently discard */ + hbtype = *p++; + n2s(p, payload); + if (1 + 2 + payload + 16 > s->s3->rrec.length) + return 0; /* silently discard per RFC 6520 sec. 4 */ + pl = p; + + if (hbtype == TLS1_HB_REQUEST) + { + unsigned char *buffer, *bp; + unsigned int write_length = 1 /* heartbeat type */ + + 2 /* heartbeat length */ + + payload + padding; + int r; + + if (write_length > SSL3_RT_MAX_PLAIN_LENGTH) + return 0; + + /* Allocate memory for the response, size is 1 byte + * message type, plus 2 bytes payload length, plus + * payload, plus padding + */ + buffer = OPENSSL_malloc(write_length); + bp = buffer; + + /* Enter response type, length and copy payload */ + *bp++ = TLS1_HB_RESPONSE; + s2n(payload, bp); + memcpy(bp, pl, payload); + bp += payload; + /* Random padding */ + RAND_pseudo_bytes(bp, padding); + + r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, write_length); + + if (r >= 0 && s->msg_callback) + s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT, + buffer, write_length, + s, s->msg_callback_arg); + + OPENSSL_free(buffer); + + if (r < 0) + return r; + } + else if (hbtype == TLS1_HB_RESPONSE) + { + unsigned int seq; + + /* We only send sequence numbers (2 bytes unsigned int), + * and 16 random bytes, so we just try to read the + * sequence number */ + n2s(pl, seq); + + if (payload == 18 && seq == s->tlsext_hb_seq) + { + dtls1_stop_timer(s); + s->tlsext_hb_seq++; + s->tlsext_hb_pending = 0; + } + } + + return 0; + } + +int +dtls1_heartbeat(SSL *s) + { + unsigned char *buf, *p; + int ret; + unsigned int payload = 18; /* Sequence number + random bytes */ + unsigned int padding = 16; /* Use minimum padding */ + + /* Only send if peer supports and accepts HB requests... */ + if (!(s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) || + s->tlsext_heartbeat & SSL_TLSEXT_HB_DONT_SEND_REQUESTS) + { + SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT); + return -1; + } + + /* ...and there is none in flight yet... */ + if (s->tlsext_hb_pending) + { + SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PENDING); + return -1; + } + + /* ...and no handshake in progress. */ + if (SSL_in_init(s) || s->in_handshake) + { + SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_UNEXPECTED_MESSAGE); + return -1; + } + + /* Check if padding is too long, payload and padding + * must not exceed 2^14 - 3 = 16381 bytes in total. + */ + OPENSSL_assert(payload + padding <= 16381); + + /* Create HeartBeat message, we just use a sequence number + * as payload to distuingish different messages and add + * some random stuff. + * - Message Type, 1 byte + * - Payload Length, 2 bytes (unsigned int) + * - Payload, the sequence number (2 bytes uint) + * - Payload, random bytes (16 bytes uint) + * - Padding + */ + buf = OPENSSL_malloc(1 + 2 + payload + padding); + p = buf; + /* Message Type */ + *p++ = TLS1_HB_REQUEST; + /* Payload length (18 bytes here) */ + s2n(payload, p); + /* Sequence number */ + s2n(s->tlsext_hb_seq, p); + /* 16 random bytes */ + RAND_pseudo_bytes(p, 16); + p += 16; + /* Random padding */ + RAND_pseudo_bytes(p, padding); + + ret = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding); + if (ret >= 0) + { + if (s->msg_callback) + s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT, + buf, 3 + payload + padding, + s, s->msg_callback_arg); + + dtls1_start_timer(s); + s->tlsext_hb_pending = 1; + } + + OPENSSL_free(buf); + + return ret; + } +#endif diff --git a/app/openssl/ssl/d1_clnt.c b/app/openssl/ssl/d1_clnt.c index 57766717..5ee8f58e 100644 --- a/app/openssl/ssl/d1_clnt.c +++ b/app/openssl/ssl/d1_clnt.c @@ -150,7 +150,11 @@ int dtls1_connect(SSL *s) unsigned long Time=(unsigned long)time(NULL); void (*cb)(const SSL *ssl,int type,int val)=NULL; int ret= -1; - int new_state,state,skip=0;; + int new_state,state,skip=0; +#ifndef OPENSSL_NO_SCTP + unsigned char sctpauthkey[64]; + char labelbuffer[sizeof(DTLS1_SCTP_AUTH_LABEL)]; +#endif RAND_add(&Time,sizeof(Time),0); ERR_clear_error(); @@ -164,6 +168,27 @@ int dtls1_connect(SSL *s) s->in_handshake++; if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s); +#ifndef OPENSSL_NO_SCTP + /* Notify SCTP BIO socket to enter handshake + * mode and prevent stream identifier other + * than 0. Will be ignored if no SCTP is used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL); +#endif + +#ifndef OPENSSL_NO_HEARTBEATS + /* If we're awaiting a HeartbeatResponse, pretend we + * already got and don't await it anymore, because + * Heartbeats don't make sense during handshakes anyway. + */ + if (s->tlsext_hb_pending) + { + dtls1_stop_timer(s); + s->tlsext_hb_pending = 0; + s->tlsext_hb_seq++; + } +#endif + for (;;) { state=s->state; @@ -171,7 +196,7 @@ int dtls1_connect(SSL *s) switch(s->state) { case SSL_ST_RENEGOTIATE: - s->new_session=1; + s->renegotiate=1; s->state=SSL_ST_CONNECT; s->ctx->stats.sess_connect_renegotiate++; /* break */ @@ -226,6 +251,42 @@ int dtls1_connect(SSL *s) s->hit = 0; break; +#ifndef OPENSSL_NO_SCTP + case DTLS1_SCTP_ST_CR_READ_SOCK: + + if (BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s))) + { + s->s3->in_read_app_data=2; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + ret = -1; + goto end; + } + + s->state=s->s3->tmp.next_state; + break; + + case DTLS1_SCTP_ST_CW_WRITE_SOCK: + /* read app data until dry event */ + + ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s)); + if (ret < 0) goto end; + + if (ret == 0) + { + s->s3->in_read_app_data=2; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + ret = -1; + goto end; + } + + s->state=s->d1->next_state; + break; +#endif + case SSL3_ST_CW_CLNT_HELLO_A: case SSL3_ST_CW_CLNT_HELLO_B: @@ -248,9 +309,17 @@ int dtls1_connect(SSL *s) s->init_num=0; - /* turn on buffering for the next lot of output */ - if (s->bbio != s->wbio) - s->wbio=BIO_push(s->bbio,s->wbio); +#ifndef OPENSSL_NO_SCTP + /* Disable buffering for SCTP */ + if (!BIO_dgram_is_sctp(SSL_get_wbio(s))) + { +#endif + /* turn on buffering for the next lot of output */ + if (s->bbio != s->wbio) + s->wbio=BIO_push(s->bbio,s->wbio); +#ifndef OPENSSL_NO_SCTP + } +#endif break; @@ -260,9 +329,25 @@ int dtls1_connect(SSL *s) if (ret <= 0) goto end; else { - dtls1_stop_timer(s); if (s->hit) + { +#ifndef OPENSSL_NO_SCTP + /* Add new shared key for SCTP-Auth, + * will be ignored if no SCTP used. + */ + snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL), + DTLS1_SCTP_AUTH_LABEL); + + SSL_export_keying_material(s, sctpauthkey, + sizeof(sctpauthkey), labelbuffer, + sizeof(labelbuffer), NULL, 0, 0); + + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY, + sizeof(sctpauthkey), sctpauthkey); +#endif + s->state=SSL3_ST_CR_FINISHED_A; + } else s->state=DTLS1_ST_CR_HELLO_VERIFY_REQUEST_A; } @@ -354,12 +439,20 @@ int dtls1_connect(SSL *s) case SSL3_ST_CR_SRVR_DONE_B: ret=ssl3_get_server_done(s); if (ret <= 0) goto end; + dtls1_stop_timer(s); if (s->s3->tmp.cert_req) - s->state=SSL3_ST_CW_CERT_A; + s->s3->tmp.next_state=SSL3_ST_CW_CERT_A; else - s->state=SSL3_ST_CW_KEY_EXCH_A; + s->s3->tmp.next_state=SSL3_ST_CW_KEY_EXCH_A; s->init_num=0; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s)) && + state == SSL_ST_RENEGOTIATE) + s->state=DTLS1_SCTP_ST_CR_READ_SOCK; + else +#endif + s->state=s->s3->tmp.next_state; break; case SSL3_ST_CW_CERT_A: @@ -378,6 +471,22 @@ int dtls1_connect(SSL *s) dtls1_start_timer(s); ret=dtls1_send_client_key_exchange(s); if (ret <= 0) goto end; + +#ifndef OPENSSL_NO_SCTP + /* Add new shared key for SCTP-Auth, + * will be ignored if no SCTP used. + */ + snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL), + DTLS1_SCTP_AUTH_LABEL); + + SSL_export_keying_material(s, sctpauthkey, + sizeof(sctpauthkey), labelbuffer, + sizeof(labelbuffer), NULL, 0, 0); + + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY, + sizeof(sctpauthkey), sctpauthkey); +#endif + /* EAY EAY EAY need to check for DH fix cert * sent back */ /* For TLS, cert_req is set to 2, so a cert chain @@ -388,7 +497,15 @@ int dtls1_connect(SSL *s) } else { - s->state=SSL3_ST_CW_CHANGE_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state=SSL3_ST_CW_CHANGE_A; + s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK; + } + else +#endif + s->state=SSL3_ST_CW_CHANGE_A; s->s3->change_cipher_spec=0; } @@ -400,7 +517,15 @@ int dtls1_connect(SSL *s) dtls1_start_timer(s); ret=dtls1_send_client_verify(s); if (ret <= 0) goto end; - s->state=SSL3_ST_CW_CHANGE_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state=SSL3_ST_CW_CHANGE_A; + s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK; + } + else +#endif + s->state=SSL3_ST_CW_CHANGE_A; s->init_num=0; s->s3->change_cipher_spec=0; break; @@ -412,6 +537,7 @@ int dtls1_connect(SSL *s) ret=dtls1_send_change_cipher_spec(s, SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B); if (ret <= 0) goto end; + s->state=SSL3_ST_CW_FINISHED_A; s->init_num=0; @@ -438,6 +564,16 @@ int dtls1_connect(SSL *s) goto end; } +#ifndef OPENSSL_NO_SCTP + if (s->hit) + { + /* Change to new shared key of SCTP-Auth, + * will be ignored if no SCTP used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL); + } +#endif + dtls1_reset_seq_numbers(s, SSL3_CC_WRITE); break; @@ -457,15 +593,36 @@ int dtls1_connect(SSL *s) if (s->hit) { s->s3->tmp.next_state=SSL_ST_OK; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = s->s3->tmp.next_state; + s->s3->tmp.next_state=DTLS1_SCTP_ST_CW_WRITE_SOCK; + } +#endif if (s->s3->flags & SSL3_FLAGS_DELAY_CLIENT_FINISHED) { s->state=SSL_ST_OK; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = SSL_ST_OK; + s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK; + } +#endif s->s3->flags|=SSL3_FLAGS_POP_BUFFER; s->s3->delay_buf_pop_ret=0; } } else { +#ifndef OPENSSL_NO_SCTP + /* Change to new shared key of SCTP-Auth, + * will be ignored if no SCTP used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL); +#endif + #ifndef OPENSSL_NO_TLSEXT /* Allow NewSessionTicket if ticket expected */ if (s->tlsext_ticket_expected) @@ -508,6 +665,16 @@ int dtls1_connect(SSL *s) s->state=SSL3_ST_CW_CHANGE_A; else s->state=SSL_ST_OK; + +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s)) && + state == SSL_ST_RENEGOTIATE) + { + s->d1->next_state=s->state; + s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK; + } +#endif + s->init_num=0; break; @@ -515,6 +682,13 @@ int dtls1_connect(SSL *s) s->rwstate=SSL_WRITING; if (BIO_flush(s->wbio) <= 0) { + /* If the write error was fatal, stop trying */ + if (!BIO_should_retry(s->wbio)) + { + s->rwstate=SSL_NOTHING; + s->state=s->s3->tmp.next_state; + } + ret= -1; goto end; } @@ -541,6 +715,7 @@ int dtls1_connect(SSL *s) /* else do it later in ssl3_write */ s->init_num=0; + s->renegotiate=0; s->new_session=0; ssl_update_cache(s,SSL_SESS_CACHE_CLIENT); @@ -587,6 +762,15 @@ int dtls1_connect(SSL *s) } end: s->in_handshake--; + +#ifndef OPENSSL_NO_SCTP + /* Notify SCTP BIO socket to leave handshake + * mode and allow stream identifier other + * than 0. Will be ignored if no SCTP is used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL); +#endif + if (buf != NULL) BUF_MEM_free(buf); if (cb != NULL) @@ -599,7 +783,7 @@ int dtls1_client_hello(SSL *s) unsigned char *buf; unsigned char *p,*d; unsigned int i,j; - unsigned long Time,l; + unsigned long l; SSL_COMP *comp; buf=(unsigned char *)s->init_buf->data; @@ -630,13 +814,11 @@ int dtls1_client_hello(SSL *s) /* if client_random is initialized, reuse it, we are * required to use same upon reply to HelloVerify */ - for (i=0;p[i]=='\0' && is3->client_random);i++) ; + for (i=0;p[i]=='\0' && is3->client_random);i++) + ; if (i==sizeof(s->s3->client_random)) - { - Time=(unsigned long)time(NULL); /* Time */ - l2n(Time,p); - RAND_pseudo_bytes(p,sizeof(s->s3->client_random)-4); - } + ssl_fill_hello_random(s, 0, p, + sizeof(s->s3->client_random)); /* Do the message type and length last */ d=p= &(buf[DTLS1_HM_HEADER_LENGTH]); @@ -1258,7 +1440,7 @@ int dtls1_send_client_key_exchange(SSL *s) goto err; } - psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint, + psk_len = s->psk_client_callback(s, s->session->psk_identity_hint, identity, PSK_MAX_IDENTITY_LEN, psk_or_pre_ms, sizeof(psk_or_pre_ms)); if (psk_len > PSK_MAX_PSK_LEN) @@ -1283,17 +1465,6 @@ int dtls1_send_client_key_exchange(SSL *s) t+=psk_len; s2n(psk_len, t); - if (s->session->psk_identity_hint != NULL) - OPENSSL_free(s->session->psk_identity_hint); - s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint); - if (s->ctx->psk_identity_hint != NULL && - s->session->psk_identity_hint == NULL) - { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; - } - if (s->session->psk_identity != NULL) OPENSSL_free(s->session->psk_identity); s->session->psk_identity = BUF_strdup(identity); diff --git a/app/openssl/ssl/d1_enc.c b/app/openssl/ssl/d1_enc.c index becbab91..712c4647 100644 --- a/app/openssl/ssl/d1_enc.c +++ b/app/openssl/ssl/d1_enc.c @@ -126,20 +126,28 @@ #include #endif +/* dtls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively. + * + * Returns: + * 0: (in non-constant time) if the record is publically invalid (i.e. too + * short etc). + * 1: if the record's padding is valid / the encryption was successful. + * -1: if the record's padding/AEAD-authenticator is invalid or, if sending, + * an internal error occured. */ int dtls1_enc(SSL *s, int send) { SSL3_RECORD *rec; EVP_CIPHER_CTX *ds; unsigned long l; - int bs,i,ii,j,k,n=0; + int bs,i,j,k,mac_size=0; const EVP_CIPHER *enc; if (send) { if (EVP_MD_CTX_md(s->write_hash)) { - n=EVP_MD_CTX_size(s->write_hash); - if (n < 0) + mac_size=EVP_MD_CTX_size(s->write_hash); + if (mac_size < 0) return -1; } ds=s->enc_write_ctx; @@ -164,9 +172,8 @@ int dtls1_enc(SSL *s, int send) { if (EVP_MD_CTX_md(s->read_hash)) { - n=EVP_MD_CTX_size(s->read_hash); - if (n < 0) - return -1; + mac_size=EVP_MD_CTX_size(s->read_hash); + OPENSSL_assert(mac_size >= 0); } ds=s->enc_read_ctx; rec= &(s->s3->rrec); @@ -231,7 +238,7 @@ int dtls1_enc(SSL *s, int send) if (!send) { if (l == 0 || l%bs != 0) - return -1; + return 0; } EVP_Cipher(ds,rec->data,rec->input,l); @@ -246,43 +253,7 @@ int dtls1_enc(SSL *s, int send) #endif /* KSSL_DEBUG */ if ((bs != 1) && !send) - { - ii=i=rec->data[l-1]; /* padding_length */ - i++; - if (s->options&SSL_OP_TLS_BLOCK_PADDING_BUG) - { - /* First packet is even in size, so check */ - if ((memcmp(s->s3->read_sequence, - "\0\0\0\0\0\0\0\0",8) == 0) && !(ii & 1)) - s->s3->flags|=TLS1_FLAGS_TLS_PADDING_BUG; - if (s->s3->flags & TLS1_FLAGS_TLS_PADDING_BUG) - i--; - } - /* TLS 1.0 does not bound the number of padding bytes by the block size. - * All of them must have value 'padding_length'. */ - if (i > (int)rec->length) - { - /* Incorrect padding. SSLerr() and ssl3_alert are done - * by caller: we don't want to reveal whether this is - * a decryption error or a MAC verification failure - * (see http://www.openssl.org/~bodo/tls-cbc.txt) - */ - return -1; - } - for (j=(int)(l-i); j<(int)l; j++) - { - if (rec->data[j] != ii) - { - /* Incorrect padding */ - return -1; - } - } - rec->length-=i; - - rec->data += bs; /* skip the implicit IV */ - rec->input += bs; - rec->length -= bs; - } + return tls1_cbc_remove_padding(s, rec, bs, mac_size); } return(1); } diff --git a/app/openssl/ssl/d1_lib.c b/app/openssl/ssl/d1_lib.c index 48e8b6ff..106939f2 100644 --- a/app/openssl/ssl/d1_lib.c +++ b/app/openssl/ssl/d1_lib.c @@ -82,6 +82,7 @@ SSL3_ENC_METHOD DTLSv1_enc_data={ TLS_MD_CLIENT_FINISH_CONST,TLS_MD_CLIENT_FINISH_CONST_SIZE, TLS_MD_SERVER_FINISH_CONST,TLS_MD_SERVER_FINISH_CONST_SIZE, tls1_alert_code, + tls1_export_keying_material, }; long dtls1_default_timeout(void) @@ -195,6 +196,7 @@ void dtls1_free(SSL *s) pqueue_free(s->d1->buffered_app_data.q); OPENSSL_free(s->d1); + s->d1 = NULL; } void dtls1_clear(SSL *s) @@ -204,7 +206,8 @@ void dtls1_clear(SSL *s) pqueue buffered_messages; pqueue sent_messages; pqueue buffered_app_data; - + unsigned int mtu; + if (s->d1) { unprocessed_rcds = s->d1->unprocessed_rcds.q; @@ -212,6 +215,7 @@ void dtls1_clear(SSL *s) buffered_messages = s->d1->buffered_messages; sent_messages = s->d1->sent_messages; buffered_app_data = s->d1->buffered_app_data.q; + mtu = s->d1->mtu; dtls1_clear_queues(s); @@ -222,6 +226,11 @@ void dtls1_clear(SSL *s) s->d1->cookie_len = sizeof(s->d1->cookie); } + if (SSL_get_options(s) & SSL_OP_NO_QUERY_MTU) + { + s->d1->mtu = mtu; + } + s->d1->unprocessed_rcds.q = unprocessed_rcds; s->d1->processed_rcds.q = processed_rcds; s->d1->buffered_messages = buffered_messages; @@ -284,6 +293,15 @@ const SSL_CIPHER *dtls1_get_cipher(unsigned int u) void dtls1_start_timer(SSL *s) { +#ifndef OPENSSL_NO_SCTP + /* Disable timer for SCTP */ + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + memset(&(s->d1->next_timeout), 0, sizeof(struct timeval)); + return; + } +#endif + /* If timer is not set, initialize duration with 1 second */ if (s->d1->next_timeout.tv_sec == 0 && s->d1->next_timeout.tv_usec == 0) { @@ -374,6 +392,7 @@ void dtls1_double_timeout(SSL *s) void dtls1_stop_timer(SSL *s) { /* Reset everything */ + memset(&(s->d1->timeout), 0, sizeof(struct dtls1_timeout_st)); memset(&(s->d1->next_timeout), 0, sizeof(struct timeval)); s->d1->timeout_duration = 1; BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT, 0, &(s->d1->next_timeout)); @@ -381,10 +400,28 @@ void dtls1_stop_timer(SSL *s) dtls1_clear_record_buffer(s); } -int dtls1_handle_timeout(SSL *s) +int dtls1_check_timeout_num(SSL *s) { - DTLS1_STATE *state; + s->d1->timeout.num_alerts++; + + /* Reduce MTU after 2 unsuccessful retransmissions */ + if (s->d1->timeout.num_alerts > 2) + { + s->d1->mtu = BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_GET_FALLBACK_MTU, 0, NULL); + } + + if (s->d1->timeout.num_alerts > DTLS1_TMO_ALERT_COUNT) + { + /* fail the connection, enough alerts have been sent */ + SSLerr(SSL_F_DTLS1_CHECK_TIMEOUT_NUM,SSL_R_READ_TIMEOUT_EXPIRED); + return -1; + } + return 0; + } + +int dtls1_handle_timeout(SSL *s) + { /* if no timer is expired, don't do anything */ if (!dtls1_is_timer_expired(s)) { @@ -392,20 +429,23 @@ int dtls1_handle_timeout(SSL *s) } dtls1_double_timeout(s); - state = s->d1; - state->timeout.num_alerts++; - if ( state->timeout.num_alerts > DTLS1_TMO_ALERT_COUNT) - { - /* fail the connection, enough alerts have been sent */ - SSLerr(SSL_F_DTLS1_HANDLE_TIMEOUT,SSL_R_READ_TIMEOUT_EXPIRED); + + if (dtls1_check_timeout_num(s) < 0) return -1; + + s->d1->timeout.read_timeouts++; + if (s->d1->timeout.read_timeouts > DTLS1_TMO_READ_COUNT) + { + s->d1->timeout.read_timeouts = 1; } - state->timeout.read_timeouts++; - if ( state->timeout.read_timeouts > DTLS1_TMO_READ_COUNT) +#ifndef OPENSSL_NO_HEARTBEATS + if (s->tlsext_hb_pending) { - state->timeout.read_timeouts = 1; + s->tlsext_hb_pending = 0; + return dtls1_heartbeat(s); } +#endif dtls1_start_timer(s); return dtls1_retransmit_buffered_messages(s); diff --git a/app/openssl/ssl/d1_pkt.c b/app/openssl/ssl/d1_pkt.c index 91562f35..5b84e97c 100644 --- a/app/openssl/ssl/d1_pkt.c +++ b/app/openssl/ssl/d1_pkt.c @@ -179,7 +179,8 @@ static int dtls1_record_needs_buffering(SSL *s, SSL3_RECORD *rr, static int dtls1_buffer_record(SSL *s, record_pqueue *q, unsigned char *priority); static int dtls1_process_record(SSL *s); -static void dtls1_clear_timeouts(SSL *s); +static int do_dtls1_write(SSL *s, int type, const unsigned char *buf, + unsigned int len); /* copy buffered record into SSL structure */ static int @@ -232,6 +233,14 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority) item->data = rdata; +#ifndef OPENSSL_NO_SCTP + /* Store bio_dgram_sctp_rcvinfo struct */ + if (BIO_dgram_is_sctp(SSL_get_rbio(s)) && + (s->state == SSL3_ST_SR_FINISHED_A || s->state == SSL3_ST_CR_FINISHED_A)) { + BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SCTP_GET_RCVINFO, sizeof(rdata->recordinfo), &rdata->recordinfo); + } +#endif + /* insert should not fail, since duplicates are dropped */ if (pqueue_insert(queue->q, item) == NULL) { @@ -369,14 +378,12 @@ static int dtls1_process_record(SSL *s) { int i,al; - int clear=0; int enc_err; SSL_SESSION *sess; SSL3_RECORD *rr; - unsigned int mac_size; + unsigned int mac_size, orig_len; unsigned char md[EVP_MAX_MD_SIZE]; - rr= &(s->s3->rrec); sess = s->session; @@ -407,14 +414,15 @@ dtls1_process_record(SSL *s) rr->data=rr->input; enc_err = s->method->ssl3_enc->enc(s,0); - if (enc_err <= 0) + /* enc_err is: + * 0: (in non-constant time) if the record is publically invalid. + * 1: if the padding is valid + * -1: if the padding is invalid */ + if (enc_err == 0) { - /* decryption failed, silently discard message */ - if (enc_err < 0) - { - rr->length = 0; - s->packet_length = 0; - } + /* For DTLS we simply ignore bad packets. */ + rr->length = 0; + s->packet_length = 0; goto err; } @@ -425,46 +433,67 @@ printf("\n"); #endif /* r->length is now the compressed data plus mac */ - if ( (sess == NULL) || - (s->enc_read_ctx == NULL) || - (s->read_hash == NULL)) - clear=1; - - if (!clear) + if ((sess != NULL) && + (s->enc_read_ctx != NULL) && + (EVP_MD_CTX_md(s->read_hash) != NULL)) { - /* !clear => s->read_hash != NULL => mac_size != -1 */ - int t; - t=EVP_MD_CTX_size(s->read_hash); - OPENSSL_assert(t >= 0); - mac_size=t; - - if (rr->length > SSL3_RT_MAX_COMPRESSED_LENGTH+mac_size) - { -#if 0 /* OK only for stream ciphers (then rr->length is visible from ciphertext anyway) */ - al=SSL_AD_RECORD_OVERFLOW; - SSLerr(SSL_F_DTLS1_PROCESS_RECORD,SSL_R_PRE_MAC_LENGTH_TOO_LONG); - goto f_err; -#else - goto err; -#endif - } - /* check the MAC for rr->input (it's in mac_size bytes at the tail) */ - if (rr->length < mac_size) + /* s->read_hash != NULL => mac_size != -1 */ + unsigned char *mac = NULL; + unsigned char mac_tmp[EVP_MAX_MD_SIZE]; + mac_size=EVP_MD_CTX_size(s->read_hash); + OPENSSL_assert(mac_size <= EVP_MAX_MD_SIZE); + + /* kludge: *_cbc_remove_padding passes padding length in rr->type */ + orig_len = rr->length+((unsigned int)rr->type>>8); + + /* orig_len is the length of the record before any padding was + * removed. This is public information, as is the MAC in use, + * therefore we can safely process the record in a different + * amount of time if it's too short to possibly contain a MAC. + */ + if (orig_len < mac_size || + /* CBC records must have a padding length byte too. */ + (EVP_CIPHER_CTX_mode(s->enc_read_ctx) == EVP_CIPH_CBC_MODE && + orig_len < mac_size+1)) { -#if 0 /* OK only for stream ciphers */ al=SSL_AD_DECODE_ERROR; SSLerr(SSL_F_DTLS1_PROCESS_RECORD,SSL_R_LENGTH_TOO_SHORT); goto f_err; -#else - goto err; -#endif } - rr->length-=mac_size; - i=s->method->ssl3_enc->mac(s,md,0); - if (i < 0 || memcmp(md,&(rr->data[rr->length]),mac_size) != 0) + + if (EVP_CIPHER_CTX_mode(s->enc_read_ctx) == EVP_CIPH_CBC_MODE) { - goto err; + /* We update the length so that the TLS header bytes + * can be constructed correctly but we need to extract + * the MAC in constant time from within the record, + * without leaking the contents of the padding bytes. + * */ + mac = mac_tmp; + ssl3_cbc_copy_mac(mac_tmp, rr, mac_size, orig_len); + rr->length -= mac_size; + } + else + { + /* In this case there's no padding, so |orig_len| + * equals |rec->length| and we checked that there's + * enough bytes for |mac_size| above. */ + rr->length -= mac_size; + mac = &rr->data[rr->length]; } + + i=s->method->ssl3_enc->mac(s,md,0 /* not send */); + if (i < 0 || mac == NULL || CRYPTO_memcmp(md, mac, (size_t)mac_size) != 0) + enc_err = -1; + if (rr->length > SSL3_RT_MAX_COMPRESSED_LENGTH+mac_size) + enc_err = -1; + } + + if (enc_err < 0) + { + /* decryption failed, silently discard message */ + rr->length = 0; + s->packet_length = 0; + goto err; } /* r->length is now just compressed */ @@ -604,24 +633,6 @@ again: goto again; } - /* If we receive a valid record larger than the current buffer size, - * allocate some memory for it. - */ - if (rr->length > s->s3->rbuf.len - DTLS1_RT_HEADER_LENGTH) - { - unsigned char *pp; - unsigned int newlen = rr->length + DTLS1_RT_HEADER_LENGTH; - if ((pp=OPENSSL_realloc(s->s3->rbuf.buf, newlen))==NULL) - { - SSLerr(SSL_F_DTLS1_GET_RECORD,ERR_R_MALLOC_FAILURE); - return(-1); - } - p = pp + (p - s->s3->rbuf.buf); - s->s3->rbuf.buf=pp; - s->s3->rbuf.len=newlen; - s->packet= &(s->s3->rbuf.buf[0]); - } - /* now s->rstate == SSL_ST_READ_BODY */ } @@ -656,20 +667,28 @@ again: goto again; /* get another record */ } - /* Check whether this is a repeat, or aged record. - * Don't check if we're listening and this message is - * a ClientHello. They can look as if they're replayed, - * since they arrive from different connections and - * would be dropped unnecessarily. - */ - if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE && - *p == SSL3_MT_CLIENT_HELLO) && - !dtls1_record_replay_check(s, bitmap)) - { - rr->length = 0; - s->packet_length=0; /* dump this record */ - goto again; /* get another record */ - } +#ifndef OPENSSL_NO_SCTP + /* Only do replay check if no SCTP bio */ + if (!BIO_dgram_is_sctp(SSL_get_rbio(s))) + { +#endif + /* Check whether this is a repeat, or aged record. + * Don't check if we're listening and this message is + * a ClientHello. They can look as if they're replayed, + * since they arrive from different connections and + * would be dropped unnecessarily. + */ + if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE && + *p == SSL3_MT_CLIENT_HELLO) && + !dtls1_record_replay_check(s, bitmap)) + { + rr->length = 0; + s->packet_length=0; /* dump this record */ + goto again; /* get another record */ + } +#ifndef OPENSSL_NO_SCTP + } +#endif /* just read a 0 length packet */ if (rr->length == 0) goto again; @@ -697,7 +716,6 @@ again: goto again; /* get another record */ } - dtls1_clear_timeouts(s); /* done waiting */ return(1); } @@ -755,7 +773,17 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) /* Now s->d1->handshake_fragment_len == 0 if type == SSL3_RT_HANDSHAKE. */ +#ifndef OPENSSL_NO_SCTP + /* Continue handshake if it had to be interrupted to read + * app data with SCTP. + */ + if ((!s->in_handshake && SSL_in_init(s)) || + (BIO_dgram_is_sctp(SSL_get_rbio(s)) && + (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK) && + s->s3->in_read_app_data != 2)) +#else if (!s->in_handshake && SSL_in_init(s)) +#endif { /* type == SSL3_RT_APPLICATION_DATA */ i=s->handshake_func(s); @@ -786,6 +814,15 @@ start: item = pqueue_pop(s->d1->buffered_app_data.q); if (item) { +#ifndef OPENSSL_NO_SCTP + /* Restore bio_dgram_sctp_rcvinfo struct */ + if (BIO_dgram_is_sctp(SSL_get_rbio(s))) + { + DTLS1_RECORD_DATA *rdata = (DTLS1_RECORD_DATA *) item->data; + BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SCTP_SET_RCVINFO, sizeof(rdata->recordinfo), &rdata->recordinfo); + } +#endif + dtls1_copy_record(s, item); OPENSSL_free(item->data); @@ -812,6 +849,12 @@ start: } } + if (s->d1->listen && rr->type != SSL3_RT_HANDSHAKE) + { + rr->length = 0; + goto start; + } + /* we now have a packet which can be read and processed */ if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, @@ -868,6 +911,31 @@ start: rr->off=0; } } + +#ifndef OPENSSL_NO_SCTP + /* We were about to renegotiate but had to read + * belated application data first, so retry. + */ + if (BIO_dgram_is_sctp(SSL_get_rbio(s)) && + rr->type == SSL3_RT_APPLICATION_DATA && + (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK)) + { + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + } + + /* We might had to delay a close_notify alert because + * of reordered app data. If there was an alert and there + * is no message to read anymore, finally set shutdown. + */ + if (BIO_dgram_is_sctp(SSL_get_rbio(s)) && + s->d1->shutdown_received && !BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s))) + { + s->shutdown |= SSL_RECEIVED_SHUTDOWN; + return(0); + } +#endif return(n); } @@ -895,6 +963,19 @@ start: dest = s->d1->alert_fragment; dest_len = &s->d1->alert_fragment_len; } +#ifndef OPENSSL_NO_HEARTBEATS + else if (rr->type == TLS1_RT_HEARTBEAT) + { + dtls1_process_heartbeat(s); + + /* Exit and notify application to read again */ + rr->length = 0; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + return(-1); + } +#endif /* else it's a CCS message, or application data or wrong */ else if (rr->type != SSL3_RT_CHANGE_CIPHER_SPEC) { @@ -978,6 +1059,8 @@ start: !(s->s3->flags & SSL3_FLAGS_NO_RENEGOTIATE_CIPHERS) && !s->s3->renegotiate) { + s->d1->handshake_read_seq++; + s->new_session = 1; ssl3_renegotiate(s); if (ssl3_renegotiate_check(s)) { @@ -1039,6 +1122,21 @@ start: s->s3->warn_alert = alert_descr; if (alert_descr == SSL_AD_CLOSE_NOTIFY) { +#ifndef OPENSSL_NO_SCTP + /* With SCTP and streams the socket may deliver app data + * after a close_notify alert. We have to check this + * first so that nothing gets discarded. + */ + if (BIO_dgram_is_sctp(SSL_get_rbio(s)) && + BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s))) + { + s->d1->shutdown_received = 1; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + return -1; + } +#endif s->shutdown |= SSL_RECEIVED_SHUTDOWN; return(0); } @@ -1145,6 +1243,15 @@ start: if (s->version == DTLS1_BAD_VER) s->d1->handshake_read_seq++; +#ifndef OPENSSL_NO_SCTP + /* Remember that a CCS has been received, + * so that an old key of SCTP-Auth can be + * deleted when a CCS is sent. Will be ignored + * if no SCTP is used + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD, 1, NULL); +#endif + goto start; } @@ -1167,6 +1274,9 @@ start: */ if (msg_hdr.type == SSL3_MT_FINISHED) { + if (dtls1_check_timeout_num(s) < 0) + return -1; + dtls1_retransmit_buffered_messages(s); rr->length = 0; goto start; @@ -1184,6 +1294,7 @@ start: #else s->state = s->server ? SSL_ST_ACCEPT : SSL_ST_CONNECT; #endif + s->renegotiate=1; s->new_session=1; } i=s->handshake_func(s); @@ -1280,7 +1391,16 @@ dtls1_write_app_data_bytes(SSL *s, int type, const void *buf_, int len) { int i; - if (SSL_in_init(s) && !s->in_handshake) +#ifndef OPENSSL_NO_SCTP + /* Check if we have to continue an interrupted handshake + * for reading belated app data with SCTP. + */ + if ((SSL_in_init(s) && !s->in_handshake) || + (BIO_dgram_is_sctp(SSL_get_wbio(s)) && + (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK))) +#else + if (SSL_in_init(s) && !s->in_handshake) +#endif { i=s->handshake_func(s); if (i < 0) return(i); @@ -1345,11 +1465,12 @@ int dtls1_write_bytes(SSL *s, int type, const void *buf, int len) OPENSSL_assert(len <= SSL3_RT_MAX_PLAIN_LENGTH); s->rwstate=SSL_NOTHING; - i=do_dtls1_write(s, type, buf, len, 0); + i=do_dtls1_write(s, type, buf, len); return i; } -int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, int create_empty_fragment) +static int do_dtls1_write(SSL *s, int type, const unsigned char *buf, + unsigned int len) { unsigned char *p,*pseq; int i,mac_size,clear=0; @@ -1358,7 +1479,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, SSL3_BUFFER *wb; SSL_SESSION *sess; int bs; - unsigned int len_with_overhead = len + SSL3_RT_DEFAULT_WRITE_OVERHEAD; /* first check if there is a SSL3_BUFFER still being written * out. This will happen with non blocking IO */ @@ -1368,16 +1488,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, return(ssl3_write_pending(s,type,buf,len)); } - if (s->s3->wbuf.len < len_with_overhead) - { - if ((p=OPENSSL_realloc(s->s3->wbuf.buf, len_with_overhead)) == NULL) { - SSLerr(SSL_F_DO_DTLS1_WRITE,ERR_R_MALLOC_FAILURE); - goto err; - } - s->s3->wbuf.buf = p; - s->s3->wbuf.len = len_with_overhead; - } - /* If we have an alert to send, lets send it */ if (s->s3->alert_dispatch) { @@ -1387,7 +1497,7 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, /* if it went, fall through and send more stuff */ } - if (len == 0 && !create_empty_fragment) + if (len == 0) return 0; wr= &(s->s3->wrec); @@ -1408,37 +1518,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, goto err; } - /* DTLS implements explicit IV, so no need for empty fragments */ -#if 0 - /* 'create_empty_fragment' is true only when this function calls itself */ - if (!clear && !create_empty_fragment && !s->s3->empty_fragment_done - && SSL_version(s) != DTLS1_VERSION && SSL_version(s) != DTLS1_BAD_VER) - { - /* countermeasure against known-IV weakness in CBC ciphersuites - * (see http://www.openssl.org/~bodo/tls-cbc.txt) - */ - - if (s->s3->need_empty_fragments && type == SSL3_RT_APPLICATION_DATA) - { - /* recursive function call with 'create_empty_fragment' set; - * this prepares and buffers the data for an empty fragment - * (these 'prefix_len' bytes are sent out later - * together with the actual payload) */ - prefix_len = s->method->do_ssl_write(s, type, buf, 0, 1); - if (prefix_len <= 0) - goto err; - - if (s->s3->wbuf.len < (size_t)prefix_len + SSL3_RT_MAX_PACKET_SIZE) - { - /* insufficient space */ - SSLerr(SSL_F_DO_DTLS1_WRITE, ERR_R_INTERNAL_ERROR); - goto err; - } - } - - s->s3->empty_fragment_done = 1; - } -#endif p = wb->buf + prefix_len; /* write the header */ @@ -1544,14 +1623,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, ssl3_record_sequence_update(&(s->s3->write_sequence[0])); - if (create_empty_fragment) - { - /* we are in a recursive call; - * just return the length, don't write out anything here - */ - return wr->length; - } - /* now let's set up wb */ wb->left = prefix_len + wr->length; wb->offset = 0; @@ -1648,7 +1719,7 @@ int dtls1_dispatch_alert(SSL *s) } #endif - i = do_dtls1_write(s, SSL3_RT_ALERT, &buf[0], sizeof(buf), 0); + i = do_dtls1_write(s, SSL3_RT_ALERT, &buf[0], sizeof(buf)); if (i <= 0) { s->s3->alert_dispatch=1; @@ -1791,10 +1862,3 @@ dtls1_reset_seq_numbers(SSL *s, int rw) memset(seq, 0x00, seq_bytes); } - - -static void -dtls1_clear_timeouts(SSL *s) - { - memset(&(s->d1->timeout), 0x00, sizeof(struct dtls1_timeout_st)); - } diff --git a/app/openssl/ssl/d1_srtp.c b/app/openssl/ssl/d1_srtp.c new file mode 100644 index 00000000..ab9c4192 --- /dev/null +++ b/app/openssl/ssl/d1_srtp.c @@ -0,0 +1,494 @@ +/* ssl/t1_lib.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ +/* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* + DTLS code by Eric Rescorla + + Copyright (C) 2006, Network Resonance, Inc. + Copyright (C) 2011, RTFM, Inc. +*/ + +#include +#include +#include "ssl_locl.h" + +#ifndef OPENSSL_NO_SRTP + +#include "srtp.h" + + +static SRTP_PROTECTION_PROFILE srtp_known_profiles[]= + { + { + "SRTP_AES128_CM_SHA1_80", + SRTP_AES128_CM_SHA1_80, + }, + { + "SRTP_AES128_CM_SHA1_32", + SRTP_AES128_CM_SHA1_32, + }, +#if 0 + { + "SRTP_NULL_SHA1_80", + SRTP_NULL_SHA1_80, + }, + { + "SRTP_NULL_SHA1_32", + SRTP_NULL_SHA1_32, + }, +#endif + {0} + }; + +static int find_profile_by_name(char *profile_name, + SRTP_PROTECTION_PROFILE **pptr,unsigned len) + { + SRTP_PROTECTION_PROFILE *p; + + p=srtp_known_profiles; + while(p->name) + { + if((len == strlen(p->name)) && !strncmp(p->name,profile_name, + len)) + { + *pptr=p; + return 0; + } + + p++; + } + + return 1; + } + +static int find_profile_by_num(unsigned profile_num, + SRTP_PROTECTION_PROFILE **pptr) + { + SRTP_PROTECTION_PROFILE *p; + + p=srtp_known_profiles; + while(p->name) + { + if(p->id == profile_num) + { + *pptr=p; + return 0; + } + p++; + } + + return 1; + } + +static int ssl_ctx_make_profiles(const char *profiles_string,STACK_OF(SRTP_PROTECTION_PROFILE) **out) + { + STACK_OF(SRTP_PROTECTION_PROFILE) *profiles; + + char *col; + char *ptr=(char *)profiles_string; + + SRTP_PROTECTION_PROFILE *p; + + if(!(profiles=sk_SRTP_PROTECTION_PROFILE_new_null())) + { + SSLerr(SSL_F_SSL_CTX_MAKE_PROFILES, SSL_R_SRTP_COULD_NOT_ALLOCATE_PROFILES); + return 1; + } + + do + { + col=strchr(ptr,':'); + + if(!find_profile_by_name(ptr,&p, + col ? col-ptr : (int)strlen(ptr))) + { + sk_SRTP_PROTECTION_PROFILE_push(profiles,p); + } + else + { + SSLerr(SSL_F_SSL_CTX_MAKE_PROFILES,SSL_R_SRTP_UNKNOWN_PROTECTION_PROFILE); + return 1; + } + + if(col) ptr=col+1; + } while (col); + + *out=profiles; + + return 0; + } + +int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx,const char *profiles) + { + return ssl_ctx_make_profiles(profiles,&ctx->srtp_profiles); + } + +int SSL_set_tlsext_use_srtp(SSL *s,const char *profiles) + { + return ssl_ctx_make_profiles(profiles,&s->srtp_profiles); + } + + +STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *s) + { + if(s != NULL) + { + if(s->srtp_profiles != NULL) + { + return s->srtp_profiles; + } + else if((s->ctx != NULL) && + (s->ctx->srtp_profiles != NULL)) + { + return s->ctx->srtp_profiles; + } + } + + return NULL; + } + +SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s) + { + return s->srtp_profile; + } + +/* Note: this function returns 0 length if there are no + profiles specified */ +int ssl_add_clienthello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen) + { + int ct=0; + int i; + STACK_OF(SRTP_PROTECTION_PROFILE) *clnt=0; + SRTP_PROTECTION_PROFILE *prof; + + clnt=SSL_get_srtp_profiles(s); + ct=sk_SRTP_PROTECTION_PROFILE_num(clnt); /* -1 if clnt == 0 */ + + if(p) + { + if(ct==0) + { + SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT,SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST); + return 1; + } + + if((2 + ct*2 + 1) > maxlen) + { + SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT,SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG); + return 1; + } + + /* Add the length */ + s2n(ct * 2, p); + for(i=0;iid,p); + } + + /* Add an empty use_mki value */ + *p++ = 0; + } + + *len=2 + ct*2 + 1; + + return 0; + } + + +int ssl_parse_clienthello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al) + { + SRTP_PROTECTION_PROFILE *cprof,*sprof; + STACK_OF(SRTP_PROTECTION_PROFILE) *clnt=0,*srvr; + int ct; + int mki_len; + int i,j; + int id; + int ret; + + /* Length value + the MKI length */ + if(len < 3) + { + SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + /* Pull off the length of the cipher suite list */ + n2s(d, ct); + len -= 2; + + /* Check that it is even */ + if(ct%2) + { + SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + /* Check that lengths are consistent */ + if(len < (ct + 1)) + { + SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + + clnt=sk_SRTP_PROTECTION_PROFILE_new_null(); + + while(ct) + { + n2s(d,id); + ct-=2; + len-=2; + + if(!find_profile_by_num(id,&cprof)) + { + sk_SRTP_PROTECTION_PROFILE_push(clnt,cprof); + } + else + { + ; /* Ignore */ + } + } + + /* Now extract the MKI value as a sanity check, but discard it for now */ + mki_len = *d; + d++; len--; + + if (mki_len != len) + { + SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_MKI_VALUE); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + srvr=SSL_get_srtp_profiles(s); + + /* Pick our most preferred profile. If no profiles have been + configured then the outer loop doesn't run + (sk_SRTP_PROTECTION_PROFILE_num() = -1) + and so we just return without doing anything */ + for(i=0;iid==sprof->id) + { + s->srtp_profile=sprof; + *al=0; + ret=0; + goto done; + } + } + } + + ret=0; + +done: + if(clnt) sk_SRTP_PROTECTION_PROFILE_free(clnt); + + return ret; + } + +int ssl_add_serverhello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen) + { + if(p) + { + if(maxlen < 5) + { + SSLerr(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT,SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG); + return 1; + } + + if(s->srtp_profile==0) + { + SSLerr(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT,SSL_R_USE_SRTP_NOT_NEGOTIATED); + return 1; + } + s2n(2, p); + s2n(s->srtp_profile->id,p); + *p++ = 0; + } + *len=5; + + return 0; + } + + +int ssl_parse_serverhello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al) + { + unsigned id; + int i; + int ct; + + STACK_OF(SRTP_PROTECTION_PROFILE) *clnt; + SRTP_PROTECTION_PROFILE *prof; + + if(len!=5) + { + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + n2s(d, ct); + if(ct!=2) + { + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + n2s(d,id); + if (*d) /* Must be no MKI, since we never offer one */ + { + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_MKI_VALUE); + *al=SSL_AD_ILLEGAL_PARAMETER; + return 1; + } + + clnt=SSL_get_srtp_profiles(s); + + /* Throw an error if the server gave us an unsolicited extension */ + if (clnt == NULL) + { + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_NO_SRTP_PROFILES); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + /* Check to see if the server gave us something we support + (and presumably offered) + */ + for(i=0;iid == id) + { + s->srtp_profile=prof; + *al=0; + return 0; + } + } + + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST); + *al=SSL_AD_DECODE_ERROR; + return 1; + } + + +#endif diff --git a/app/openssl/ssl/d1_srvr.c b/app/openssl/ssl/d1_srvr.c index a6a4c87e..09f47627 100644 --- a/app/openssl/ssl/d1_srvr.c +++ b/app/openssl/ssl/d1_srvr.c @@ -151,6 +151,10 @@ int dtls1_accept(SSL *s) int ret= -1; int new_state,state,skip=0; int listen; +#ifndef OPENSSL_NO_SCTP + unsigned char sctpauthkey[64]; + char labelbuffer[sizeof(DTLS1_SCTP_AUTH_LABEL)]; +#endif RAND_add(&Time,sizeof(Time),0); ERR_clear_error(); @@ -168,6 +172,13 @@ int dtls1_accept(SSL *s) if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s); s->d1->listen = listen; +#ifndef OPENSSL_NO_SCTP + /* Notify SCTP BIO socket to enter handshake + * mode and prevent stream identifier other + * than 0. Will be ignored if no SCTP is used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL); +#endif if (s->cert == NULL) { @@ -175,6 +186,19 @@ int dtls1_accept(SSL *s) return(-1); } +#ifndef OPENSSL_NO_HEARTBEATS + /* If we're awaiting a HeartbeatResponse, pretend we + * already got and don't await it anymore, because + * Heartbeats don't make sense during handshakes anyway. + */ + if (s->tlsext_hb_pending) + { + dtls1_stop_timer(s); + s->tlsext_hb_pending = 0; + s->tlsext_hb_seq++; + } +#endif + for (;;) { state=s->state; @@ -182,7 +206,7 @@ int dtls1_accept(SSL *s) switch (s->state) { case SSL_ST_RENEGOTIATE: - s->new_session=1; + s->renegotiate=1; /* s->state=SSL_ST_ACCEPT; */ case SSL_ST_BEFORE: @@ -227,8 +251,12 @@ int dtls1_accept(SSL *s) { /* Ok, we now need to push on a buffering BIO so that * the output is sent in a way that TCP likes :-) + * ...but not with SCTP :-) */ - if (!ssl_init_wbio_buffer(s,1)) { ret= -1; goto end; } +#ifndef OPENSSL_NO_SCTP + if (!BIO_dgram_is_sctp(SSL_get_wbio(s))) +#endif + if (!ssl_init_wbio_buffer(s,1)) { ret= -1; goto end; } ssl3_init_finished_mac(s); s->state=SSL3_ST_SR_CLNT_HELLO_A; @@ -248,10 +276,11 @@ int dtls1_accept(SSL *s) case SSL3_ST_SW_HELLO_REQ_B: s->shutdown=0; + dtls1_clear_record_buffer(s); dtls1_start_timer(s); ret=dtls1_send_hello_request(s); if (ret <= 0) goto end; - s->s3->tmp.next_state=SSL3_ST_SW_HELLO_REQ_C; + s->s3->tmp.next_state=SSL3_ST_SR_CLNT_HELLO_A; s->state=SSL3_ST_SW_FLUSH; s->init_num=0; @@ -313,25 +342,75 @@ int dtls1_accept(SSL *s) ssl3_init_finished_mac(s); break; +#ifndef OPENSSL_NO_SCTP + case DTLS1_SCTP_ST_SR_READ_SOCK: + + if (BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s))) + { + s->s3->in_read_app_data=2; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + ret = -1; + goto end; + } + + s->state=SSL3_ST_SR_FINISHED_A; + break; + + case DTLS1_SCTP_ST_SW_WRITE_SOCK: + ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s)); + if (ret < 0) goto end; + + if (ret == 0) + { + if (s->d1->next_state != SSL_ST_OK) + { + s->s3->in_read_app_data=2; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + ret = -1; + goto end; + } + } + + s->state=s->d1->next_state; + break; +#endif + case SSL3_ST_SW_SRVR_HELLO_A: case SSL3_ST_SW_SRVR_HELLO_B: - s->new_session = 2; + s->renegotiate = 2; dtls1_start_timer(s); ret=dtls1_send_server_hello(s); if (ret <= 0) goto end; -#ifndef OPENSSL_NO_TLSEXT if (s->hit) { +#ifndef OPENSSL_NO_SCTP + /* Add new shared key for SCTP-Auth, + * will be ignored if no SCTP used. + */ + snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL), + DTLS1_SCTP_AUTH_LABEL); + + SSL_export_keying_material(s, sctpauthkey, + sizeof(sctpauthkey), labelbuffer, + sizeof(labelbuffer), NULL, 0, 0); + + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY, + sizeof(sctpauthkey), sctpauthkey); +#endif +#ifndef OPENSSL_NO_TLSEXT if (s->tlsext_ticket_expected) s->state=SSL3_ST_SW_SESSION_TICKET_A; else s->state=SSL3_ST_SW_CHANGE_A; - } #else - if (s->hit) - s->state=SSL3_ST_SW_CHANGE_A; + s->state=SSL3_ST_SW_CHANGE_A; #endif + } else s->state=SSL3_ST_SW_CERT_A; s->init_num=0; @@ -392,7 +471,7 @@ int dtls1_accept(SSL *s) /* PSK: send ServerKeyExchange if PSK identity * hint if provided */ #ifndef OPENSSL_NO_PSK - || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint) + || ((alg_k & SSL_kPSK) && s->session->psk_identity_hint) #endif || (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd)) || (alg_k & SSL_kEECDH) @@ -441,6 +520,13 @@ int dtls1_accept(SSL *s) skip=1; s->s3->tmp.cert_request=0; s->state=SSL3_ST_SW_SRVR_DONE_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = SSL3_ST_SW_SRVR_DONE_A; + s->state = DTLS1_SCTP_ST_SW_WRITE_SOCK; + } +#endif } else { @@ -450,9 +536,23 @@ int dtls1_accept(SSL *s) if (ret <= 0) goto end; #ifndef NETSCAPE_HANG_BUG s->state=SSL3_ST_SW_SRVR_DONE_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = SSL3_ST_SW_SRVR_DONE_A; + s->state = DTLS1_SCTP_ST_SW_WRITE_SOCK; + } +#endif #else s->state=SSL3_ST_SW_FLUSH; s->s3->tmp.next_state=SSL3_ST_SR_CERT_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = s->s3->tmp.next_state; + s->s3->tmp.next_state=DTLS1_SCTP_ST_SW_WRITE_SOCK; + } +#endif #endif s->init_num=0; } @@ -472,6 +572,13 @@ int dtls1_accept(SSL *s) s->rwstate=SSL_WRITING; if (BIO_flush(s->wbio) <= 0) { + /* If the write error was fatal, stop trying */ + if (!BIO_should_retry(s->wbio)) + { + s->rwstate=SSL_NOTHING; + s->state=s->s3->tmp.next_state; + } + ret= -1; goto end; } @@ -485,15 +592,16 @@ int dtls1_accept(SSL *s) ret = ssl3_check_client_hello(s); if (ret <= 0) goto end; - dtls1_stop_timer(s); if (ret == 2) + { + dtls1_stop_timer(s); s->state = SSL3_ST_SR_CLNT_HELLO_C; + } else { /* could be sent for a DH cert, even if we * have not asked for it :-) */ ret=ssl3_get_client_certificate(s); if (ret <= 0) goto end; - dtls1_stop_timer(s); s->init_num=0; s->state=SSL3_ST_SR_KEY_EXCH_A; } @@ -503,7 +611,21 @@ int dtls1_accept(SSL *s) case SSL3_ST_SR_KEY_EXCH_B: ret=ssl3_get_client_key_exchange(s); if (ret <= 0) goto end; - dtls1_stop_timer(s); +#ifndef OPENSSL_NO_SCTP + /* Add new shared key for SCTP-Auth, + * will be ignored if no SCTP used. + */ + snprintf((char *) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL), + DTLS1_SCTP_AUTH_LABEL); + + SSL_export_keying_material(s, sctpauthkey, + sizeof(sctpauthkey), labelbuffer, + sizeof(labelbuffer), NULL, 0, 0); + + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY, + sizeof(sctpauthkey), sctpauthkey); +#endif + s->state=SSL3_ST_SR_CERT_VRFY_A; s->init_num=0; @@ -540,9 +662,13 @@ int dtls1_accept(SSL *s) /* we should decide if we expected this one */ ret=ssl3_get_cert_verify(s); if (ret <= 0) goto end; - dtls1_stop_timer(s); - - s->state=SSL3_ST_SR_FINISHED_A; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s)) && + state == SSL_ST_RENEGOTIATE) + s->state=DTLS1_SCTP_ST_SR_READ_SOCK; + else +#endif + s->state=SSL3_ST_SR_FINISHED_A; s->init_num=0; break; @@ -594,6 +720,17 @@ int dtls1_accept(SSL *s) SSL3_ST_SW_CHANGE_A,SSL3_ST_SW_CHANGE_B); if (ret <= 0) goto end; + +#ifndef OPENSSL_NO_SCTP + if (!s->hit) + { + /* Change to new shared key of SCTP-Auth, + * will be ignored if no SCTP used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL); + } +#endif + s->state=SSL3_ST_SW_FINISHED_A; s->init_num=0; @@ -616,9 +753,27 @@ int dtls1_accept(SSL *s) if (ret <= 0) goto end; s->state=SSL3_ST_SW_FLUSH; if (s->hit) + { s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A; + +#ifndef OPENSSL_NO_SCTP + /* Change to new shared key of SCTP-Auth, + * will be ignored if no SCTP used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL); +#endif + } else + { s->s3->tmp.next_state=SSL_ST_OK; +#ifndef OPENSSL_NO_SCTP + if (BIO_dgram_is_sctp(SSL_get_wbio(s))) + { + s->d1->next_state = s->s3->tmp.next_state; + s->s3->tmp.next_state=DTLS1_SCTP_ST_SW_WRITE_SOCK; + } +#endif + } s->init_num=0; break; @@ -636,11 +791,9 @@ int dtls1_accept(SSL *s) s->init_num=0; - if (s->new_session == 2) /* skipped if we just sent a HelloRequest */ + if (s->renegotiate == 2) /* skipped if we just sent a HelloRequest */ { - /* actually not necessarily a 'new' session unless - * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is set */ - + s->renegotiate=0; s->new_session=0; ssl_update_cache(s,SSL_SESS_CACHE_SERVER); @@ -692,6 +845,14 @@ end: /* BIO_flush(s->wbio); */ s->in_handshake--; +#ifndef OPENSSL_NO_SCTP + /* Notify SCTP BIO socket to leave handshake + * mode and prevent stream identifier other + * than 0. Will be ignored if no SCTP is used. + */ + BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL); +#endif + if (cb != NULL) cb(s,SSL_CB_ACCEPT_EXIT,ret); return(ret); @@ -764,15 +925,13 @@ int dtls1_send_server_hello(SSL *s) unsigned char *p,*d; int i; unsigned int sl; - unsigned long l,Time; + unsigned long l; if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { buf=(unsigned char *)s->init_buf->data; p=s->s3->server_random; - Time=(unsigned long)time(NULL); /* Time */ - l2n(Time,p); - RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-sizeof(Time)); + ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE); /* Do the message type and length last */ d=p= &(buf[DTLS1_HM_HEADER_LENGTH]); @@ -1129,7 +1288,7 @@ int dtls1_send_server_key_exchange(SSL *s) if (type & SSL_kPSK) { /* reserve size for record length and PSK identity hint*/ - n+=2+strlen(s->ctx->psk_identity_hint); + n+=2+strlen(s->session->psk_identity_hint); } else #endif /* !OPENSSL_NO_PSK */ @@ -1147,7 +1306,7 @@ int dtls1_send_server_key_exchange(SSL *s) if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) { - if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher)) + if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher, NULL)) == NULL) { al=SSL_AD_DECODE_ERROR; @@ -1205,9 +1364,9 @@ int dtls1_send_server_key_exchange(SSL *s) if (type & SSL_kPSK) { /* copy PSK identity hint */ - s2n(strlen(s->ctx->psk_identity_hint), p); - strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint)); - p+=strlen(s->ctx->psk_identity_hint); + s2n(strlen(s->session->psk_identity_hint), p); + strncpy((char *)p, s->session->psk_identity_hint, strlen(s->session->psk_identity_hint)); + p+=strlen(s->session->psk_identity_hint); } #endif @@ -1271,7 +1430,7 @@ int dtls1_send_server_key_exchange(SSL *s) EVP_SignInit_ex(&md_ctx,EVP_ecdsa(), NULL); EVP_SignUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); EVP_SignUpdate(&md_ctx,&(s->s3->server_random[0]),SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx,&(d[4]),n); + EVP_SignUpdate(&md_ctx,&(d[DTLS1_HM_HEADER_LENGTH]),n); if (!EVP_SignFinal(&md_ctx,&(p[2]), (unsigned int *)&i,pkey)) { diff --git a/app/openssl/ssl/dtls1.h b/app/openssl/ssl/dtls1.h index 2900d1d8..e65d5011 100644 --- a/app/openssl/ssl/dtls1.h +++ b/app/openssl/ssl/dtls1.h @@ -57,8 +57,8 @@ * */ -#ifndef HEADER_DTLS1_H -#define HEADER_DTLS1_H +#ifndef HEADER_DTLS1_H +#define HEADER_DTLS1_H #include #include @@ -72,8 +72,12 @@ #elif defined(OPENSSL_SYS_NETWARE) && !defined(_WINSOCK2API_) #include #else +#if defined(OPENSSL_SYS_VXWORKS) +#include +#else #include #endif +#endif #ifdef __cplusplus extern "C" { @@ -105,6 +109,11 @@ extern "C" { #define DTLS1_AL_HEADER_LENGTH 2 #endif +#ifndef OPENSSL_NO_SSL_INTERN + +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_AUTH_LABEL "EXPORTER_DTLS_OVER_SCTP" +#endif typedef struct dtls1_bitmap_st { @@ -227,7 +236,7 @@ typedef struct dtls1_state_st struct dtls1_timeout_st timeout; - /* Indicates when the last handshake msg sent will timeout */ + /* Indicates when the last handshake msg or heartbeat sent will timeout */ struct timeval next_timeout; /* Timeout duration */ @@ -243,6 +252,13 @@ typedef struct dtls1_state_st unsigned int retransmitting; unsigned int change_cipher_spec_ok; +#ifndef OPENSSL_NO_SCTP + /* used when SSL_ST_XX_FLUSH is entered */ + int next_state; + + int shutdown_received; +#endif + } DTLS1_STATE; typedef struct dtls1_record_data_st @@ -251,8 +267,12 @@ typedef struct dtls1_record_data_st unsigned int packet_length; SSL3_BUFFER rbuf; SSL3_RECORD rrec; +#ifndef OPENSSL_NO_SCTP + struct bio_dgram_sctp_rcvinfo recordinfo; +#endif } DTLS1_RECORD_DATA; +#endif /* Timeout multipliers (timeout slice is defined in apps/timeouts.h */ #define DTLS1_TMO_READ_COUNT 2 diff --git a/app/openssl/ssl/kssl.c b/app/openssl/ssl/kssl.c index b820e374..fd7c67bb 100644 --- a/app/openssl/ssl/kssl.c +++ b/app/openssl/ssl/kssl.c @@ -2194,6 +2194,22 @@ krb5_error_code kssl_build_principal_2( return ENOMEM; } +void SSL_set0_kssl_ctx(SSL *s, KSSL_CTX *kctx) + { + s->kssl_ctx = kctx; + } + +KSSL_CTX * SSL_get0_kssl_ctx(SSL *s) + { + return s->kssl_ctx; + } + +char *kssl_ctx_get0_client_princ(KSSL_CTX *kctx) + { + if (kctx) + return kctx->client_princ; + return NULL; + } #else /* !OPENSSL_NO_KRB5 */ diff --git a/app/openssl/ssl/kssl.h b/app/openssl/ssl/kssl.h index a3d20e1c..e4df8430 100644 --- a/app/openssl/ssl/kssl.h +++ b/app/openssl/ssl/kssl.h @@ -70,6 +70,15 @@ #include #include #include +#ifdef OPENSSL_SYS_WIN32 +/* These can sometimes get redefined indirectly by krb5 header files + * after they get undefed in ossl_typ.h + */ +#undef X509_NAME +#undef X509_EXTENSIONS +#undef OCSP_REQUEST +#undef OCSP_RESPONSE +#endif #ifdef __cplusplus extern "C" { @@ -172,6 +181,10 @@ krb5_error_code kssl_check_authent(KSSL_CTX *kssl_ctx, krb5_data *authentp, krb5_timestamp *atimep, KSSL_ERR *kssl_err); unsigned char *kssl_skip_confound(krb5_enctype enctype, unsigned char *authn); +void SSL_set0_kssl_ctx(SSL *s, KSSL_CTX *kctx); +KSSL_CTX * SSL_get0_kssl_ctx(SSL *s); +char *kssl_ctx_get0_client_princ(KSSL_CTX *kctx); + #ifdef __cplusplus } #endif diff --git a/app/openssl/ssl/s23_clnt.c b/app/openssl/ssl/s23_clnt.c index f41fe3ab..2bc92141 100644 --- a/app/openssl/ssl/s23_clnt.c +++ b/app/openssl/ssl/s23_clnt.c @@ -129,6 +129,10 @@ static const SSL_METHOD *ssl23_get_client_method(int ver) return(SSLv3_client_method()); else if (ver == TLS1_VERSION) return(TLSv1_client_method()); + else if (ver == TLS1_1_VERSION) + return(TLSv1_1_client_method()); + else if (ver == TLS1_2_VERSION) + return(TLSv1_2_client_method()); else return(NULL); } @@ -265,12 +269,35 @@ static int ssl23_no_ssl2_ciphers(SSL *s) return 1; } +/* Fill a ClientRandom or ServerRandom field of length len. Returns <= 0 + * on failure, 1 on success. */ +int ssl_fill_hello_random(SSL *s, int server, unsigned char *result, int len) + { + int send_time = 0; + + if (len < 4) + return 0; + if (server) + send_time = (s->mode & SSL_MODE_SEND_SERVERHELLO_TIME) != 0; + else + send_time = (s->mode & SSL_MODE_SEND_CLIENTHELLO_TIME) != 0; + if (send_time) + { + unsigned long Time = (unsigned long)time(NULL); + unsigned char *p = result; + l2n(Time, p); + return RAND_pseudo_bytes(p, len-4); + } + else + return RAND_pseudo_bytes(result, len); + } + static int ssl23_client_hello(SSL *s) { unsigned char *buf; unsigned char *p,*d; int i,ch_len; - unsigned long Time,l; + unsigned long l; int ssl2_compat; int version = 0, version_major, version_minor; #ifndef OPENSSL_NO_COMP @@ -278,24 +305,51 @@ static int ssl23_client_hello(SSL *s) SSL_COMP *comp; #endif int ret; + unsigned long mask, options = s->options; - ssl2_compat = (s->options & SSL_OP_NO_SSLv2) ? 0 : 1; + ssl2_compat = (options & SSL_OP_NO_SSLv2) ? 0 : 1; if (ssl2_compat && ssl23_no_ssl2_ciphers(s)) ssl2_compat = 0; - if (!(s->options & SSL_OP_NO_TLSv1)) - { + /* + * SSL_OP_NO_X disables all protocols above X *if* there are + * some protocols below X enabled. This is required in order + * to maintain "version capability" vector contiguous. So + * that if application wants to disable TLS1.0 in favour of + * TLS1>=1, it would be insufficient to pass SSL_NO_TLSv1, the + * answer is SSL_OP_NO_TLSv1|SSL_OP_NO_SSLv3|SSL_OP_NO_SSLv2. + */ + mask = SSL_OP_NO_TLSv1_1|SSL_OP_NO_TLSv1 +#if !defined(OPENSSL_NO_SSL3) + |SSL_OP_NO_SSLv3 +#endif +#if !defined(OPENSSL_NO_SSL2) + |(ssl2_compat?SSL_OP_NO_SSLv2:0) +#endif + ; +#if !defined(OPENSSL_NO_TLS1_2_CLIENT) + version = TLS1_2_VERSION; + + if ((options & SSL_OP_NO_TLSv1_2) && (options & mask) != mask) + version = TLS1_1_VERSION; +#else + version = TLS1_1_VERSION; +#endif + mask &= ~SSL_OP_NO_TLSv1_1; + if ((options & SSL_OP_NO_TLSv1_1) && (options & mask) != mask) version = TLS1_VERSION; - } - else if (!(s->options & SSL_OP_NO_SSLv3)) - { + mask &= ~SSL_OP_NO_TLSv1; +#if !defined(OPENSSL_NO_SSL3) + if ((options & SSL_OP_NO_TLSv1) && (options & mask) != mask) version = SSL3_VERSION; - } - else if (!(s->options & SSL_OP_NO_SSLv2)) - { + mask &= ~SSL_OP_NO_SSLv3; +#endif +#if !defined(OPENSSL_NO_SSL2) + if ((options & SSL_OP_NO_SSLv3) && (options & mask) != mask) version = SSL2_VERSION; - } +#endif + #ifndef OPENSSL_NO_TLSEXT if (version != SSL2_VERSION) { @@ -324,16 +378,32 @@ static int ssl23_client_hello(SSL *s) #endif p=s->s3->client_random; - Time=(unsigned long)time(NULL); /* Time */ - l2n(Time,p); - if (RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-4) <= 0) + if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0) return -1; - if (version == TLS1_VERSION) + if (version == TLS1_2_VERSION) + { + version_major = TLS1_2_VERSION_MAJOR; + version_minor = TLS1_2_VERSION_MINOR; + } + else if (version == TLS1_1_VERSION) + { + version_major = TLS1_1_VERSION_MAJOR; + version_minor = TLS1_1_VERSION_MINOR; + } + else if (version == TLS1_VERSION) { version_major = TLS1_VERSION_MAJOR; version_minor = TLS1_VERSION_MINOR; } +#ifdef OPENSSL_FIPS + else if(FIPS_mode()) + { + SSLerr(SSL_F_SSL23_CLIENT_HELLO, + SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE); + return -1; + } +#endif else if (version == SSL3_VERSION) { version_major = SSL3_VERSION_MAJOR; @@ -437,6 +507,15 @@ static int ssl23_client_hello(SSL *s) SSLerr(SSL_F_SSL23_CLIENT_HELLO,SSL_R_NO_CIPHERS_AVAILABLE); return -1; } +#ifdef OPENSSL_MAX_TLS1_2_CIPHER_LENGTH + /* Some servers hang if client hello > 256 bytes + * as hack workaround chop number of supported ciphers + * to keep it well below this if we use TLS v1.2 + */ + if (TLS1_get_version(s) >= TLS1_2_VERSION + && i > OPENSSL_MAX_TLS1_2_CIPHER_LENGTH) + i = OPENSSL_MAX_TLS1_2_CIPHER_LENGTH & ~1; +#endif s2n(i,p); p+=i; @@ -491,8 +570,13 @@ static int ssl23_client_hello(SSL *s) d=buf; *(d++) = SSL3_RT_HANDSHAKE; *(d++) = version_major; - *(d++) = version_minor; /* arguably we should send the *lowest* suported version here - * (indicating, e.g., TLS 1.0 in "SSL 3.0 format") */ + /* Some servers hang if we use long client hellos + * and a record number > TLS 1.0. + */ + if (TLS1_get_client_version(s) > TLS1_VERSION) + *(d++) = 1; + else + *(d++) = version_minor; s2n((int)l,d); /* number of bytes to write */ @@ -608,7 +692,7 @@ static int ssl23_get_server_hello(SSL *s) #endif } else if (p[1] == SSL3_VERSION_MAJOR && - (p[2] == SSL3_VERSION_MINOR || p[2] == TLS1_VERSION_MINOR) && + p[2] <= TLS1_2_VERSION_MINOR && ((p[0] == SSL3_RT_HANDSHAKE && p[5] == SSL3_MT_SERVER_HELLO) || (p[0] == SSL3_RT_ALERT && p[3] == 0 && p[4] == 2))) { @@ -617,6 +701,14 @@ static int ssl23_get_server_hello(SSL *s) if ((p[2] == SSL3_VERSION_MINOR) && !(s->options & SSL_OP_NO_SSLv3)) { +#ifdef OPENSSL_FIPS + if(FIPS_mode()) + { + SSLerr(SSL_F_SSL23_GET_SERVER_HELLO, + SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE); + goto err; + } +#endif s->version=SSL3_VERSION; s->method=SSLv3_client_method(); } @@ -626,6 +718,18 @@ static int ssl23_get_server_hello(SSL *s) s->version=TLS1_VERSION; s->method=TLSv1_client_method(); } + else if ((p[2] == TLS1_1_VERSION_MINOR) && + !(s->options & SSL_OP_NO_TLSv1_1)) + { + s->version=TLS1_1_VERSION; + s->method=TLSv1_1_client_method(); + } + else if ((p[2] == TLS1_2_VERSION_MINOR) && + !(s->options & SSL_OP_NO_TLSv1_2)) + { + s->version=TLS1_2_VERSION; + s->method=TLSv1_2_client_method(); + } else { SSLerr(SSL_F_SSL23_GET_SERVER_HELLO,SSL_R_UNSUPPORTED_PROTOCOL); diff --git a/app/openssl/ssl/s23_meth.c b/app/openssl/ssl/s23_meth.c index c6099efc..40eae0f0 100644 --- a/app/openssl/ssl/s23_meth.c +++ b/app/openssl/ssl/s23_meth.c @@ -76,6 +76,10 @@ static const SSL_METHOD *ssl23_get_method(int ver) #ifndef OPENSSL_NO_TLS1 if (ver == TLS1_VERSION) return(TLSv1_method()); + else if (ver == TLS1_1_VERSION) + return(TLSv1_1_method()); + else if (ver == TLS1_2_VERSION) + return(TLSv1_2_method()); else #endif return(NULL); diff --git a/app/openssl/ssl/s23_srvr.c b/app/openssl/ssl/s23_srvr.c index e22879c8..48778490 100644 --- a/app/openssl/ssl/s23_srvr.c +++ b/app/openssl/ssl/s23_srvr.c @@ -115,6 +115,9 @@ #include #include #include +#ifdef OPENSSL_FIPS +#include +#endif static const SSL_METHOD *ssl23_get_server_method(int ver); int ssl23_get_client_hello(SSL *s); @@ -128,6 +131,10 @@ static const SSL_METHOD *ssl23_get_server_method(int ver) return(SSLv3_server_method()); else if (ver == TLS1_VERSION) return(TLSv1_server_method()); + else if (ver == TLS1_1_VERSION) + return(TLSv1_1_server_method()); + else if (ver == TLS1_2_VERSION) + return(TLSv1_2_server_method()); else return(NULL); } @@ -283,7 +290,20 @@ int ssl23_get_client_hello(SSL *s) /* SSLv3/TLSv1 */ if (p[4] >= TLS1_VERSION_MINOR) { - if (!(s->options & SSL_OP_NO_TLSv1)) + if (p[4] >= TLS1_2_VERSION_MINOR && + !(s->options & SSL_OP_NO_TLSv1_2)) + { + s->version=TLS1_2_VERSION; + s->state=SSL23_ST_SR_CLNT_HELLO_B; + } + else if (p[4] >= TLS1_1_VERSION_MINOR && + !(s->options & SSL_OP_NO_TLSv1_1)) + { + s->version=TLS1_1_VERSION; + /* type=2; */ /* done later to survive restarts */ + s->state=SSL23_ST_SR_CLNT_HELLO_B; + } + else if (!(s->options & SSL_OP_NO_TLSv1)) { s->version=TLS1_VERSION; /* type=2; */ /* done later to survive restarts */ @@ -350,7 +370,19 @@ int ssl23_get_client_hello(SSL *s) v[1]=p[10]; /* minor version according to client_version */ if (v[1] >= TLS1_VERSION_MINOR) { - if (!(s->options & SSL_OP_NO_TLSv1)) + if (v[1] >= TLS1_2_VERSION_MINOR && + !(s->options & SSL_OP_NO_TLSv1_2)) + { + s->version=TLS1_2_VERSION; + type=3; + } + else if (v[1] >= TLS1_1_VERSION_MINOR && + !(s->options & SSL_OP_NO_TLSv1_1)) + { + s->version=TLS1_1_VERSION; + type=3; + } + else if (!(s->options & SSL_OP_NO_TLSv1)) { s->version=TLS1_VERSION; type=3; @@ -393,6 +425,15 @@ int ssl23_get_client_hello(SSL *s) } } +#ifdef OPENSSL_FIPS + if (FIPS_mode() && (s->version < TLS1_VERSION)) + { + SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO, + SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE); + goto err; + } +#endif + if (s->state == SSL23_ST_SR_CLNT_HELLO_B) { /* we have SSLv3/TLSv1 in an SSLv2 header @@ -403,13 +444,8 @@ int ssl23_get_client_hello(SSL *s) v[0] = p[3]; /* == SSL3_VERSION_MAJOR */ v[1] = p[4]; -/* The SSL2 protocol allows n to be larger, just pick - * a reasonable buffer size. */ -#if SSL3_RT_DEFAULT_PACKET_SIZE < 1024*4 - SSL3_RT_DEFAULT_WRITE_OVERHEAD -#error "SSL3_RT_DEFAULT_PACKET_SIZE is too small." -#endif n=((p[0]&0x7f)<<8)|p[1]; - if (n > SSL3_RT_DEFAULT_PACKET_SIZE - 2) + if (n > (1024*4)) { SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO,SSL_R_RECORD_TOO_LARGE); goto err; @@ -572,8 +608,11 @@ int ssl23_get_client_hello(SSL *s) s->s3->rbuf.left=0; s->s3->rbuf.offset=0; } - - if (s->version == TLS1_VERSION) + if (s->version == TLS1_2_VERSION) + s->method = TLSv1_2_server_method(); + else if (s->version == TLS1_1_VERSION) + s->method = TLSv1_1_server_method(); + else if (s->version == TLS1_VERSION) s->method = TLSv1_server_method(); else s->method = SSLv3_server_method(); diff --git a/app/openssl/ssl/s2_clnt.c b/app/openssl/ssl/s2_clnt.c index 00ac158f..03b6cf96 100644 --- a/app/openssl/ssl/s2_clnt.c +++ b/app/openssl/ssl/s2_clnt.c @@ -359,12 +359,14 @@ static int get_server_hello(SSL *s) SSL_R_PEER_ERROR); return(-1); } -#ifdef __APPLE_CC__ - /* The Rhapsody 5.5 (a.k.a. MacOS X) compiler bug - * workaround. */ - s->hit=(i=*(p++))?1:0; -#else +#if 0 s->hit=(*(p++))?1:0; + /* Some [PPC?] compilers fail to increment p in above + statement, e.g. one provided with Rhapsody 5.5, but + most recent example XL C 11.1 for AIX, even without + optimization flag... */ +#else + s->hit=(*p)?1:0; p++; #endif s->s2->tmp.cert_type= *(p++); n2s(p,i); @@ -937,7 +939,7 @@ static int get_server_verify(SSL *s) s->msg_callback(0, s->version, 0, p, len, s, s->msg_callback_arg); /* SERVER-VERIFY */ p += 1; - if (memcmp(p,s->s2->challenge,s->s2->challenge_length) != 0) + if (CRYPTO_memcmp(p,s->s2->challenge,s->s2->challenge_length) != 0) { ssl2_return_error(s,SSL2_PE_UNDEFINED_ERROR); SSLerr(SSL_F_GET_SERVER_VERIFY,SSL_R_CHALLENGE_IS_DIFFERENT); diff --git a/app/openssl/ssl/s2_pkt.c b/app/openssl/ssl/s2_pkt.c index ac963b2d..8bb6ab8b 100644 --- a/app/openssl/ssl/s2_pkt.c +++ b/app/openssl/ssl/s2_pkt.c @@ -269,8 +269,7 @@ static int ssl2_read_internal(SSL *s, void *buf, int len, int peek) s->s2->ract_data_length-=mac_size; ssl2_mac(s,mac,0); s->s2->ract_data_length-=s->s2->padding; - if ( (memcmp(mac,s->s2->mac_data, - (unsigned int)mac_size) != 0) || + if ( (CRYPTO_memcmp(mac,s->s2->mac_data,mac_size) != 0) || (s->s2->rlength%EVP_CIPHER_CTX_block_size(s->enc_read_ctx) != 0)) { SSLerr(SSL_F_SSL2_READ_INTERNAL,SSL_R_BAD_MAC_DECODE); diff --git a/app/openssl/ssl/s2_srvr.c b/app/openssl/ssl/s2_srvr.c index bc885e8e..2cba426b 100644 --- a/app/openssl/ssl/s2_srvr.c +++ b/app/openssl/ssl/s2_srvr.c @@ -1059,10 +1059,12 @@ static int request_certificate(SSL *s) EVP_PKEY *pkey=NULL; EVP_MD_CTX_init(&ctx); - EVP_VerifyInit_ex(&ctx,s->ctx->rsa_md5, NULL); - EVP_VerifyUpdate(&ctx,s->s2->key_material, - s->s2->key_material_length); - EVP_VerifyUpdate(&ctx,ccd,SSL2_MIN_CERT_CHALLENGE_LENGTH); + if (!EVP_VerifyInit_ex(&ctx,s->ctx->rsa_md5, NULL) + || !EVP_VerifyUpdate(&ctx,s->s2->key_material, + s->s2->key_material_length) + || !EVP_VerifyUpdate(&ctx,ccd, + SSL2_MIN_CERT_CHALLENGE_LENGTH)) + goto msg_end; i=i2d_X509(s->cert->pkeys[SSL_PKEY_RSA_ENC].x509,NULL); buf2=OPENSSL_malloc((unsigned int)i); @@ -1073,7 +1075,11 @@ static int request_certificate(SSL *s) } p2=buf2; i=i2d_X509(s->cert->pkeys[SSL_PKEY_RSA_ENC].x509,&p2); - EVP_VerifyUpdate(&ctx,buf2,(unsigned int)i); + if (!EVP_VerifyUpdate(&ctx,buf2,(unsigned int)i)) + { + OPENSSL_free(buf2); + goto msg_end; + } OPENSSL_free(buf2); pkey=X509_get_pubkey(x509); diff --git a/app/openssl/ssl/s3_both.c b/app/openssl/ssl/s3_both.c index 508e3902..d9e18a31 100644 --- a/app/openssl/ssl/s3_both.c +++ b/app/openssl/ssl/s3_both.c @@ -161,6 +161,8 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) i=s->method->ssl3_enc->final_finish_mac(s, sender,slen,s->s3->tmp.finish_md); + if (i == 0) + return 0; s->s3->tmp.finish_md_len = i; memcpy(p, s->s3->tmp.finish_md, i); p+=i; @@ -208,7 +210,11 @@ static void ssl3_take_mac(SSL *s) { const char *sender; int slen; - + /* If no new cipher setup return immediately: other functions will + * set the appropriate error. + */ + if (s->s3->tmp.new_cipher == NULL) + return; if (s->state & SSL_ST_CONNECT) { sender=s->method->ssl3_enc->server_finished_label; @@ -233,7 +239,7 @@ int ssl3_get_finished(SSL *s, int a, int b) #ifdef OPENSSL_NO_NEXTPROTONEG /* the mac has already been generated when we received the - * change cipher spec message and is in s->s3->tmp.peer_finish_md + * change cipher spec message and is in s->s3->tmp.peer_finish_md. */ #endif @@ -265,7 +271,7 @@ int ssl3_get_finished(SSL *s, int a, int b) goto f_err; } - if (memcmp(p, s->s3->tmp.peer_finish_md, i) != 0) + if (CRYPTO_memcmp(p, s->s3->tmp.peer_finish_md, i) != 0) { al=SSL_AD_DECRYPT_ERROR; SSLerr(SSL_F_SSL3_GET_FINISHED,SSL_R_DIGEST_CHECK_FAILED); @@ -555,7 +561,8 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) #endif /* Feed this message into MAC computation. */ - ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4); + if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS) + ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4); if (s->msg_callback) s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg); *ok=1; @@ -756,20 +763,13 @@ int ssl3_setup_read_buffer(SSL *s) if (s->s3->rbuf.buf == NULL) { - if (SSL_get_mode(s) & SSL_MODE_SMALL_BUFFERS) - { - len = SSL3_RT_DEFAULT_PACKET_SIZE; - } - else + len = SSL3_RT_MAX_PLAIN_LENGTH + + SSL3_RT_MAX_ENCRYPTED_OVERHEAD + + headerlen + align; + if (s->options & SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER) { - len = SSL3_RT_MAX_PLAIN_LENGTH - + SSL3_RT_MAX_ENCRYPTED_OVERHEAD - + headerlen + align; - if (s->options & SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER) - { - s->s3->init_extra = 1; - len += SSL3_RT_MAX_EXTRA; - } + s->s3->init_extra = 1; + len += SSL3_RT_MAX_EXTRA; } #ifndef OPENSSL_NO_COMP if (!(s->options & SSL_OP_NO_COMPRESSION)) @@ -805,15 +805,7 @@ int ssl3_setup_write_buffer(SSL *s) if (s->s3->wbuf.buf == NULL) { - if (SSL_get_mode(s) & SSL_MODE_SMALL_BUFFERS) - { - len = SSL3_RT_DEFAULT_PACKET_SIZE; - } - else - { - len = s->max_send_fragment; - } - len += 0 + len = s->max_send_fragment + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD + headerlen + align; #ifndef OPENSSL_NO_COMP @@ -823,6 +815,7 @@ int ssl3_setup_write_buffer(SSL *s) if (!(s->options & SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS)) len += headerlen + align + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD; + if ((p=freelist_extract(s->ctx, 0, len)) == NULL) goto err; s->s3->wbuf.buf = p; @@ -865,3 +858,4 @@ int ssl3_release_read_buffer(SSL *s) } return 1; } + diff --git a/app/openssl/ssl/s3_cbc.c b/app/openssl/ssl/s3_cbc.c new file mode 100644 index 00000000..443a31e7 --- /dev/null +++ b/app/openssl/ssl/s3_cbc.c @@ -0,0 +1,790 @@ +/* ssl/s3_cbc.c */ +/* ==================================================================== + * Copyright (c) 2012 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include "ssl_locl.h" + +#include +#include + +/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length + * field. (SHA-384/512 have 128-bit length.) */ +#define MAX_HASH_BIT_COUNT_BYTES 16 + +/* MAX_HASH_BLOCK_SIZE is the maximum hash block size that we'll support. + * Currently SHA-384/512 has a 128-byte block size and that's the largest + * supported by TLS.) */ +#define MAX_HASH_BLOCK_SIZE 128 + +/* Some utility functions are needed: + * + * These macros return the given value with the MSB copied to all the other + * bits. They use the fact that arithmetic shift shifts-in the sign bit. + * However, this is not ensured by the C standard so you may need to replace + * them with something else on odd CPUs. */ +#define DUPLICATE_MSB_TO_ALL(x) ( (unsigned)( (int)(x) >> (sizeof(int)*8-1) ) ) +#define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x))) + +/* constant_time_lt returns 0xff if a=b and 0x00 otherwise. */ +static unsigned constant_time_ge(unsigned a, unsigned b) + { + a -= b; + return DUPLICATE_MSB_TO_ALL(~a); + } + +/* constant_time_eq_8 returns 0xff if a==b and 0x00 otherwise. */ +static unsigned char constant_time_eq_8(unsigned a, unsigned b) + { + unsigned c = a ^ b; + c--; + return DUPLICATE_MSB_TO_ALL_8(c); + } + +/* ssl3_cbc_remove_padding removes padding from the decrypted, SSLv3, CBC + * record in |rec| by updating |rec->length| in constant time. + * + * block_size: the block size of the cipher used to encrypt the record. + * returns: + * 0: (in non-constant time) if the record is publicly invalid. + * 1: if the padding was valid + * -1: otherwise. */ +int ssl3_cbc_remove_padding(const SSL* s, + SSL3_RECORD *rec, + unsigned block_size, + unsigned mac_size) + { + unsigned padding_length, good; + const unsigned overhead = 1 /* padding length byte */ + mac_size; + + /* These lengths are all public so we can test them in non-constant + * time. */ + if (overhead > rec->length) + return 0; + + padding_length = rec->data[rec->length-1]; + good = constant_time_ge(rec->length, padding_length+overhead); + /* SSLv3 requires that the padding is minimal. */ + good &= constant_time_ge(block_size, padding_length+1); + padding_length = good & (padding_length+1); + rec->length -= padding_length; + rec->type |= padding_length<<8; /* kludge: pass padding length */ + return (int)((good & 1) | (~good & -1)); +} + +/* tls1_cbc_remove_padding removes the CBC padding from the decrypted, TLS, CBC + * record in |rec| in constant time and returns 1 if the padding is valid and + * -1 otherwise. It also removes any explicit IV from the start of the record + * without leaking any timing about whether there was enough space after the + * padding was removed. + * + * block_size: the block size of the cipher used to encrypt the record. + * returns: + * 0: (in non-constant time) if the record is publicly invalid. + * 1: if the padding was valid + * -1: otherwise. */ +int tls1_cbc_remove_padding(const SSL* s, + SSL3_RECORD *rec, + unsigned block_size, + unsigned mac_size) + { + unsigned padding_length, good, to_check, i; + const unsigned overhead = 1 /* padding length byte */ + mac_size; + /* Check if version requires explicit IV */ + if (s->version >= TLS1_1_VERSION || s->version == DTLS1_BAD_VER) + { + /* These lengths are all public so we can test them in + * non-constant time. + */ + if (overhead + block_size > rec->length) + return 0; + /* We can now safely skip explicit IV */ + rec->data += block_size; + rec->input += block_size; + rec->length -= block_size; + } + else if (overhead > rec->length) + return 0; + + padding_length = rec->data[rec->length-1]; + + /* NB: if compression is in operation the first packet may not be of + * even length so the padding bug check cannot be performed. This bug + * workaround has been around since SSLeay so hopefully it is either + * fixed now or no buggy implementation supports compression [steve] + */ + if ( (s->options&SSL_OP_TLS_BLOCK_PADDING_BUG) && !s->expand) + { + /* First packet is even in size, so check */ + if ((memcmp(s->s3->read_sequence, "\0\0\0\0\0\0\0\0",8) == 0) && + !(padding_length & 1)) + { + s->s3->flags|=TLS1_FLAGS_TLS_PADDING_BUG; + } + if ((s->s3->flags & TLS1_FLAGS_TLS_PADDING_BUG) && + padding_length > 0) + { + padding_length--; + } + } + + if (EVP_CIPHER_flags(s->enc_read_ctx->cipher)&EVP_CIPH_FLAG_AEAD_CIPHER) + { + /* padding is already verified */ + rec->length -= padding_length + 1; + return 1; + } + + good = constant_time_ge(rec->length, overhead+padding_length); + /* The padding consists of a length byte at the end of the record and + * then that many bytes of padding, all with the same value as the + * length byte. Thus, with the length byte included, there are i+1 + * bytes of padding. + * + * We can't check just |padding_length+1| bytes because that leaks + * decrypted information. Therefore we always have to check the maximum + * amount of padding possible. (Again, the length of the record is + * public information so we can use it.) */ + to_check = 255; /* maximum amount of padding. */ + if (to_check > rec->length-1) + to_check = rec->length-1; + + for (i = 0; i < to_check; i++) + { + unsigned char mask = constant_time_ge(padding_length, i); + unsigned char b = rec->data[rec->length-1-i]; + /* The final |padding_length+1| bytes should all have the value + * |padding_length|. Therefore the XOR should be zero. */ + good &= ~(mask&(padding_length ^ b)); + } + + /* If any of the final |padding_length+1| bytes had the wrong value, + * one or more of the lower eight bits of |good| will be cleared. We + * AND the bottom 8 bits together and duplicate the result to all the + * bits. */ + good &= good >> 4; + good &= good >> 2; + good &= good >> 1; + good <<= sizeof(good)*8-1; + good = DUPLICATE_MSB_TO_ALL(good); + + padding_length = good & (padding_length+1); + rec->length -= padding_length; + rec->type |= padding_length<<8; /* kludge: pass padding length */ + + return (int)((good & 1) | (~good & -1)); + } + +/* ssl3_cbc_copy_mac copies |md_size| bytes from the end of |rec| to |out| in + * constant time (independent of the concrete value of rec->length, which may + * vary within a 256-byte window). + * + * ssl3_cbc_remove_padding or tls1_cbc_remove_padding must be called prior to + * this function. + * + * On entry: + * rec->orig_len >= md_size + * md_size <= EVP_MAX_MD_SIZE + * + * If CBC_MAC_ROTATE_IN_PLACE is defined then the rotation is performed with + * variable accesses in a 64-byte-aligned buffer. Assuming that this fits into + * a single or pair of cache-lines, then the variable memory accesses don't + * actually affect the timing. CPUs with smaller cache-lines [if any] are + * not multi-core and are not considered vulnerable to cache-timing attacks. + */ +#define CBC_MAC_ROTATE_IN_PLACE + +void ssl3_cbc_copy_mac(unsigned char* out, + const SSL3_RECORD *rec, + unsigned md_size,unsigned orig_len) + { +#if defined(CBC_MAC_ROTATE_IN_PLACE) + unsigned char rotated_mac_buf[64+EVP_MAX_MD_SIZE]; + unsigned char *rotated_mac; +#else + unsigned char rotated_mac[EVP_MAX_MD_SIZE]; +#endif + + /* mac_end is the index of |rec->data| just after the end of the MAC. */ + unsigned mac_end = rec->length; + unsigned mac_start = mac_end - md_size; + /* scan_start contains the number of bytes that we can ignore because + * the MAC's position can only vary by 255 bytes. */ + unsigned scan_start = 0; + unsigned i, j; + unsigned div_spoiler; + unsigned rotate_offset; + + OPENSSL_assert(orig_len >= md_size); + OPENSSL_assert(md_size <= EVP_MAX_MD_SIZE); + +#if defined(CBC_MAC_ROTATE_IN_PLACE) + rotated_mac = rotated_mac_buf + ((0-(size_t)rotated_mac_buf)&63); +#endif + + /* This information is public so it's safe to branch based on it. */ + if (orig_len > md_size + 255 + 1) + scan_start = orig_len - (md_size + 255 + 1); + /* div_spoiler contains a multiple of md_size that is used to cause the + * modulo operation to be constant time. Without this, the time varies + * based on the amount of padding when running on Intel chips at least. + * + * The aim of right-shifting md_size is so that the compiler doesn't + * figure out that it can remove div_spoiler as that would require it + * to prove that md_size is always even, which I hope is beyond it. */ + div_spoiler = md_size >> 1; + div_spoiler <<= (sizeof(div_spoiler)-1)*8; + rotate_offset = (div_spoiler + mac_start - scan_start) % md_size; + + memset(rotated_mac, 0, md_size); + for (i = scan_start, j = 0; i < orig_len; i++) + { + unsigned char mac_started = constant_time_ge(i, mac_start); + unsigned char mac_ended = constant_time_ge(i, mac_end); + unsigned char b = rec->data[i]; + rotated_mac[j++] |= b & mac_started & ~mac_ended; + j &= constant_time_lt(j,md_size); + } + + /* Now rotate the MAC */ +#if defined(CBC_MAC_ROTATE_IN_PLACE) + j = 0; + for (i = 0; i < md_size; i++) + { + /* in case cache-line is 32 bytes, touch second line */ + ((volatile unsigned char *)rotated_mac)[rotate_offset^32]; + out[j++] = rotated_mac[rotate_offset++]; + rotate_offset &= constant_time_lt(rotate_offset,md_size); + } +#else + memset(out, 0, md_size); + rotate_offset = md_size - rotate_offset; + rotate_offset &= constant_time_lt(rotate_offset,md_size); + for (i = 0; i < md_size; i++) + { + for (j = 0; j < md_size; j++) + out[j] |= rotated_mac[i] & constant_time_eq_8(j, rotate_offset); + rotate_offset++; + rotate_offset &= constant_time_lt(rotate_offset,md_size); + } +#endif + } + +/* u32toLE serialises an unsigned, 32-bit number (n) as four bytes at (p) in + * little-endian order. The value of p is advanced by four. */ +#define u32toLE(n, p) \ + (*((p)++)=(unsigned char)(n), \ + *((p)++)=(unsigned char)(n>>8), \ + *((p)++)=(unsigned char)(n>>16), \ + *((p)++)=(unsigned char)(n>>24)) + +/* These functions serialize the state of a hash and thus perform the standard + * "final" operation without adding the padding and length that such a function + * typically does. */ +static void tls1_md5_final_raw(void* ctx, unsigned char *md_out) + { + MD5_CTX *md5 = ctx; + u32toLE(md5->A, md_out); + u32toLE(md5->B, md_out); + u32toLE(md5->C, md_out); + u32toLE(md5->D, md_out); + } + +static void tls1_sha1_final_raw(void* ctx, unsigned char *md_out) + { + SHA_CTX *sha1 = ctx; + l2n(sha1->h0, md_out); + l2n(sha1->h1, md_out); + l2n(sha1->h2, md_out); + l2n(sha1->h3, md_out); + l2n(sha1->h4, md_out); + } +#define LARGEST_DIGEST_CTX SHA_CTX + +#ifndef OPENSSL_NO_SHA256 +static void tls1_sha256_final_raw(void* ctx, unsigned char *md_out) + { + SHA256_CTX *sha256 = ctx; + unsigned i; + + for (i = 0; i < 8; i++) + { + l2n(sha256->h[i], md_out); + } + } +#undef LARGEST_DIGEST_CTX +#define LARGEST_DIGEST_CTX SHA256_CTX +#endif + +#ifndef OPENSSL_NO_SHA512 +static void tls1_sha512_final_raw(void* ctx, unsigned char *md_out) + { + SHA512_CTX *sha512 = ctx; + unsigned i; + + for (i = 0; i < 8; i++) + { + l2n8(sha512->h[i], md_out); + } + } +#undef LARGEST_DIGEST_CTX +#define LARGEST_DIGEST_CTX SHA512_CTX +#endif + +/* ssl3_cbc_record_digest_supported returns 1 iff |ctx| uses a hash function + * which ssl3_cbc_digest_record supports. */ +char ssl3_cbc_record_digest_supported(const EVP_MD_CTX *ctx) + { +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return 0; +#endif + switch (EVP_MD_CTX_type(ctx)) + { + case NID_md5: + case NID_sha1: +#ifndef OPENSSL_NO_SHA256 + case NID_sha224: + case NID_sha256: +#endif +#ifndef OPENSSL_NO_SHA512 + case NID_sha384: + case NID_sha512: +#endif + return 1; + default: + return 0; + } + } + +/* ssl3_cbc_digest_record computes the MAC of a decrypted, padded SSLv3/TLS + * record. + * + * ctx: the EVP_MD_CTX from which we take the hash function. + * ssl3_cbc_record_digest_supported must return true for this EVP_MD_CTX. + * md_out: the digest output. At most EVP_MAX_MD_SIZE bytes will be written. + * md_out_size: if non-NULL, the number of output bytes is written here. + * header: the 13-byte, TLS record header. + * data: the record data itself, less any preceeding explicit IV. + * data_plus_mac_size: the secret, reported length of the data and MAC + * once the padding has been removed. + * data_plus_mac_plus_padding_size: the public length of the whole + * record, including padding. + * is_sslv3: non-zero if we are to use SSLv3. Otherwise, TLS. + * + * On entry: by virtue of having been through one of the remove_padding + * functions, above, we know that data_plus_mac_size is large enough to contain + * a padding byte and MAC. (If the padding was invalid, it might contain the + * padding too. ) */ +void ssl3_cbc_digest_record( + const EVP_MD_CTX *ctx, + unsigned char* md_out, + size_t* md_out_size, + const unsigned char header[13], + const unsigned char *data, + size_t data_plus_mac_size, + size_t data_plus_mac_plus_padding_size, + const unsigned char *mac_secret, + unsigned mac_secret_length, + char is_sslv3) + { + union { double align; + unsigned char c[sizeof(LARGEST_DIGEST_CTX)]; } md_state; + void (*md_final_raw)(void *ctx, unsigned char *md_out); + void (*md_transform)(void *ctx, const unsigned char *block); + unsigned md_size, md_block_size = 64; + unsigned sslv3_pad_length = 40, header_length, variance_blocks, + len, max_mac_bytes, num_blocks, + num_starting_blocks, k, mac_end_offset, c, index_a, index_b; + unsigned int bits; /* at most 18 bits */ + unsigned char length_bytes[MAX_HASH_BIT_COUNT_BYTES]; + /* hmac_pad is the masked HMAC key. */ + unsigned char hmac_pad[MAX_HASH_BLOCK_SIZE]; + unsigned char first_block[MAX_HASH_BLOCK_SIZE]; + unsigned char mac_out[EVP_MAX_MD_SIZE]; + unsigned i, j, md_out_size_u; + EVP_MD_CTX md_ctx; + /* mdLengthSize is the number of bytes in the length field that terminates + * the hash. */ + unsigned md_length_size = 8; + char length_is_big_endian = 1; + + /* This is a, hopefully redundant, check that allows us to forget about + * many possible overflows later in this function. */ + OPENSSL_assert(data_plus_mac_plus_padding_size < 1024*1024); + + switch (EVP_MD_CTX_type(ctx)) + { + case NID_md5: + MD5_Init((MD5_CTX*)md_state.c); + md_final_raw = tls1_md5_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) MD5_Transform; + md_size = 16; + sslv3_pad_length = 48; + length_is_big_endian = 0; + break; + case NID_sha1: + SHA1_Init((SHA_CTX*)md_state.c); + md_final_raw = tls1_sha1_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) SHA1_Transform; + md_size = 20; + break; +#ifndef OPENSSL_NO_SHA256 + case NID_sha224: + SHA224_Init((SHA256_CTX*)md_state.c); + md_final_raw = tls1_sha256_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) SHA256_Transform; + md_size = 224/8; + break; + case NID_sha256: + SHA256_Init((SHA256_CTX*)md_state.c); + md_final_raw = tls1_sha256_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) SHA256_Transform; + md_size = 32; + break; +#endif +#ifndef OPENSSL_NO_SHA512 + case NID_sha384: + SHA384_Init((SHA512_CTX*)md_state.c); + md_final_raw = tls1_sha512_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) SHA512_Transform; + md_size = 384/8; + md_block_size = 128; + md_length_size = 16; + break; + case NID_sha512: + SHA512_Init((SHA512_CTX*)md_state.c); + md_final_raw = tls1_sha512_final_raw; + md_transform = (void(*)(void *ctx, const unsigned char *block)) SHA512_Transform; + md_size = 64; + md_block_size = 128; + md_length_size = 16; + break; +#endif + default: + /* ssl3_cbc_record_digest_supported should have been + * called first to check that the hash function is + * supported. */ + OPENSSL_assert(0); + if (md_out_size) + *md_out_size = -1; + return; + } + + OPENSSL_assert(md_length_size <= MAX_HASH_BIT_COUNT_BYTES); + OPENSSL_assert(md_block_size <= MAX_HASH_BLOCK_SIZE); + OPENSSL_assert(md_size <= EVP_MAX_MD_SIZE); + + header_length = 13; + if (is_sslv3) + { + header_length = + mac_secret_length + + sslv3_pad_length + + 8 /* sequence number */ + + 1 /* record type */ + + 2 /* record length */; + } + + /* variance_blocks is the number of blocks of the hash that we have to + * calculate in constant time because they could be altered by the + * padding value. + * + * In SSLv3, the padding must be minimal so the end of the plaintext + * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that + * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash + * termination (0x80 + 64-bit length) don't fit in the final block, we + * say that the final two blocks can vary based on the padding. + * + * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not + * required to be minimal. Therefore we say that the final six blocks + * can vary based on the padding. + * + * Later in the function, if the message is short and there obviously + * cannot be this many blocks then variance_blocks can be reduced. */ + variance_blocks = is_sslv3 ? 2 : 6; + /* From now on we're dealing with the MAC, which conceptually has 13 + * bytes of `header' before the start of the data (TLS) or 71/75 bytes + * (SSLv3) */ + len = data_plus_mac_plus_padding_size + header_length; + /* max_mac_bytes contains the maximum bytes of bytes in the MAC, including + * |header|, assuming that there's no padding. */ + max_mac_bytes = len - md_size - 1; + /* num_blocks is the maximum number of hash blocks. */ + num_blocks = (max_mac_bytes + 1 + md_length_size + md_block_size - 1) / md_block_size; + /* In order to calculate the MAC in constant time we have to handle + * the final blocks specially because the padding value could cause the + * end to appear somewhere in the final |variance_blocks| blocks and we + * can't leak where. However, |num_starting_blocks| worth of data can + * be hashed right away because no padding value can affect whether + * they are plaintext. */ + num_starting_blocks = 0; + /* k is the starting byte offset into the conceptual header||data where + * we start processing. */ + k = 0; + /* mac_end_offset is the index just past the end of the data to be + * MACed. */ + mac_end_offset = data_plus_mac_size + header_length - md_size; + /* c is the index of the 0x80 byte in the final hash block that + * contains application data. */ + c = mac_end_offset % md_block_size; + /* index_a is the hash block number that contains the 0x80 terminating + * value. */ + index_a = mac_end_offset / md_block_size; + /* index_b is the hash block number that contains the 64-bit hash + * length, in bits. */ + index_b = (mac_end_offset + md_length_size) / md_block_size; + /* bits is the hash-length in bits. It includes the additional hash + * block for the masked HMAC key, or whole of |header| in the case of + * SSLv3. */ + + /* For SSLv3, if we're going to have any starting blocks then we need + * at least two because the header is larger than a single block. */ + if (num_blocks > variance_blocks + (is_sslv3 ? 1 : 0)) + { + num_starting_blocks = num_blocks - variance_blocks; + k = md_block_size*num_starting_blocks; + } + + bits = 8*mac_end_offset; + if (!is_sslv3) + { + /* Compute the initial HMAC block. For SSLv3, the padding and + * secret bytes are included in |header| because they take more + * than a single block. */ + bits += 8*md_block_size; + memset(hmac_pad, 0, md_block_size); + OPENSSL_assert(mac_secret_length <= sizeof(hmac_pad)); + memcpy(hmac_pad, mac_secret, mac_secret_length); + for (i = 0; i < md_block_size; i++) + hmac_pad[i] ^= 0x36; + + md_transform(md_state.c, hmac_pad); + } + + if (length_is_big_endian) + { + memset(length_bytes,0,md_length_size-4); + length_bytes[md_length_size-4] = (unsigned char)(bits>>24); + length_bytes[md_length_size-3] = (unsigned char)(bits>>16); + length_bytes[md_length_size-2] = (unsigned char)(bits>>8); + length_bytes[md_length_size-1] = (unsigned char)bits; + } + else + { + memset(length_bytes,0,md_length_size); + length_bytes[md_length_size-5] = (unsigned char)(bits>>24); + length_bytes[md_length_size-6] = (unsigned char)(bits>>16); + length_bytes[md_length_size-7] = (unsigned char)(bits>>8); + length_bytes[md_length_size-8] = (unsigned char)bits; + } + + if (k > 0) + { + if (is_sslv3) + { + /* The SSLv3 header is larger than a single block. + * overhang is the number of bytes beyond a single + * block that the header consumes: either 7 bytes + * (SHA1) or 11 bytes (MD5). */ + unsigned overhang = header_length-md_block_size; + md_transform(md_state.c, header); + memcpy(first_block, header + md_block_size, overhang); + memcpy(first_block + overhang, data, md_block_size-overhang); + md_transform(md_state.c, first_block); + for (i = 1; i < k/md_block_size - 1; i++) + md_transform(md_state.c, data + md_block_size*i - overhang); + } + else + { + /* k is a multiple of md_block_size. */ + memcpy(first_block, header, 13); + memcpy(first_block+13, data, md_block_size-13); + md_transform(md_state.c, first_block); + for (i = 1; i < k/md_block_size; i++) + md_transform(md_state.c, data + md_block_size*i - 13); + } + } + + memset(mac_out, 0, sizeof(mac_out)); + + /* We now process the final hash blocks. For each block, we construct + * it in constant time. If the |i==index_a| then we'll include the 0x80 + * bytes and zero pad etc. For each block we selectively copy it, in + * constant time, to |mac_out|. */ + for (i = num_starting_blocks; i <= num_starting_blocks+variance_blocks; i++) + { + unsigned char block[MAX_HASH_BLOCK_SIZE]; + unsigned char is_block_a = constant_time_eq_8(i, index_a); + unsigned char is_block_b = constant_time_eq_8(i, index_b); + for (j = 0; j < md_block_size; j++) + { + unsigned char b = 0, is_past_c, is_past_cp1; + if (k < header_length) + b = header[k]; + else if (k < data_plus_mac_plus_padding_size + header_length) + b = data[k-header_length]; + k++; + + is_past_c = is_block_a & constant_time_ge(j, c); + is_past_cp1 = is_block_a & constant_time_ge(j, c+1); + /* If this is the block containing the end of the + * application data, and we are at the offset for the + * 0x80 value, then overwrite b with 0x80. */ + b = (b&~is_past_c) | (0x80&is_past_c); + /* If this the the block containing the end of the + * application data and we're past the 0x80 value then + * just write zero. */ + b = b&~is_past_cp1; + /* If this is index_b (the final block), but not + * index_a (the end of the data), then the 64-bit + * length didn't fit into index_a and we're having to + * add an extra block of zeros. */ + b &= ~is_block_b | is_block_a; + + /* The final bytes of one of the blocks contains the + * length. */ + if (j >= md_block_size - md_length_size) + { + /* If this is index_b, write a length byte. */ + b = (b&~is_block_b) | (is_block_b&length_bytes[j-(md_block_size-md_length_size)]); + } + block[j] = b; + } + + md_transform(md_state.c, block); + md_final_raw(md_state.c, block); + /* If this is index_b, copy the hash value to |mac_out|. */ + for (j = 0; j < md_size; j++) + mac_out[j] |= block[j]&is_block_b; + } + + EVP_MD_CTX_init(&md_ctx); + EVP_DigestInit_ex(&md_ctx, ctx->digest, NULL /* engine */); + if (is_sslv3) + { + /* We repurpose |hmac_pad| to contain the SSLv3 pad2 block. */ + memset(hmac_pad, 0x5c, sslv3_pad_length); + + EVP_DigestUpdate(&md_ctx, mac_secret, mac_secret_length); + EVP_DigestUpdate(&md_ctx, hmac_pad, sslv3_pad_length); + EVP_DigestUpdate(&md_ctx, mac_out, md_size); + } + else + { + /* Complete the HMAC in the standard manner. */ + for (i = 0; i < md_block_size; i++) + hmac_pad[i] ^= 0x6a; + + EVP_DigestUpdate(&md_ctx, hmac_pad, md_block_size); + EVP_DigestUpdate(&md_ctx, mac_out, md_size); + } + EVP_DigestFinal(&md_ctx, md_out, &md_out_size_u); + if (md_out_size) + *md_out_size = md_out_size_u; + EVP_MD_CTX_cleanup(&md_ctx); + } + +#ifdef OPENSSL_FIPS + +/* Due to the need to use EVP in FIPS mode we can't reimplement digests but + * we can ensure the number of blocks processed is equal for all cases + * by digesting additional data. + */ + +void tls_fips_digest_extra( + const EVP_CIPHER_CTX *cipher_ctx, EVP_MD_CTX *mac_ctx, + const unsigned char *data, size_t data_len, size_t orig_len) + { + size_t block_size, digest_pad, blocks_data, blocks_orig; + if (EVP_CIPHER_CTX_mode(cipher_ctx) != EVP_CIPH_CBC_MODE) + return; + block_size = EVP_MD_CTX_block_size(mac_ctx); + /* We are in FIPS mode if we get this far so we know we have only SHA* + * digests and TLS to deal with. + * Minimum digest padding length is 17 for SHA384/SHA512 and 9 + * otherwise. + * Additional header is 13 bytes. To get the number of digest blocks + * processed round up the amount of data plus padding to the nearest + * block length. Block length is 128 for SHA384/SHA512 and 64 otherwise. + * So we have: + * blocks = (payload_len + digest_pad + 13 + block_size - 1)/block_size + * equivalently: + * blocks = (payload_len + digest_pad + 12)/block_size + 1 + * HMAC adds a constant overhead. + * We're ultimately only interested in differences so this becomes + * blocks = (payload_len + 29)/128 + * for SHA384/SHA512 and + * blocks = (payload_len + 21)/64 + * otherwise. + */ + digest_pad = block_size == 64 ? 21 : 29; + blocks_orig = (orig_len + digest_pad)/block_size; + blocks_data = (data_len + digest_pad)/block_size; + /* MAC enough blocks to make up the difference between the original + * and actual lengths plus one extra block to ensure this is never a + * no op. The "data" pointer should always have enough space to + * perform this operation as it is large enough for a maximum + * length TLS buffer. + */ + EVP_DigestSignUpdate(mac_ctx, data, + (blocks_orig - blocks_data + 1) * block_size); + } +#endif diff --git a/app/openssl/ssl/s3_clnt.c b/app/openssl/ssl/s3_clnt.c index 42bcd629..5e15b75c 100644 --- a/app/openssl/ssl/s3_clnt.c +++ b/app/openssl/ssl/s3_clnt.c @@ -156,6 +156,9 @@ #include #include #include +#ifdef OPENSSL_FIPS +#include +#endif #ifndef OPENSSL_NO_DH #include #endif @@ -199,19 +202,37 @@ int ssl3_connect(SSL *s) s->in_handshake++; if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s); -#if 0 /* Send app data in separate packet, otherwise, some particular site - * (only one site so far) closes the socket. - * Note: there is a very small chance that two TCP packets - * could be arriving at server combined into a single TCP packet, - * then trigger that site to break. We haven't encounter that though. + +#ifndef OPENSSL_NO_HEARTBEATS + /* If we're awaiting a HeartbeatResponse, pretend we + * already got and don't await it anymore, because + * Heartbeats don't make sense during handshakes anyway. */ + if (s->tlsext_hb_pending) + { + s->tlsext_hb_pending = 0; + s->tlsext_hb_seq++; + } +#endif + +// BEGIN android-added +#if 0 +/* Send app data in separate packet, otherwise, some particular site + * (only one site so far) closes the socket. http://b/2511073 + * Note: there is a very small chance that two TCP packets + * could be arriving at server combined into a single TCP packet, + * then trigger that site to break. We haven't encounter that though. + */ +// END android-added if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) { /* Send app data along with CCS/Finished */ s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED; } -#endif +// BEGIN android-added +#endif +// END android-added for (;;) { state=s->state; @@ -219,7 +240,7 @@ int ssl3_connect(SSL *s) switch(s->state) { case SSL_ST_RENEGOTIATE: - s->new_session=1; + s->renegotiate=1; s->state=SSL_ST_CONNECT; s->ctx->stats.sess_connect_renegotiate++; /* break */ @@ -292,7 +313,16 @@ int ssl3_connect(SSL *s) if (ret <= 0) goto end; if (s->hit) + { s->state=SSL3_ST_CR_FINISHED_A; +#ifndef OPENSSL_NO_TLSEXT + if (s->tlsext_ticket_expected) + { + /* receive renewed session ticket */ + s->state=SSL3_ST_CR_SESSION_TICKET_A; + } +#endif + } else s->state=SSL3_ST_CR_CERT_A; s->init_num=0; @@ -315,9 +345,10 @@ int ssl3_connect(SSL *s) } #endif /* Check if it is anon DH/ECDH */ - /* or PSK */ + /* or non-RSA PSK */ if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) && - !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) + !((s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK) && + !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA))) { ret=ssl3_get_server_certificate(s); if (ret <= 0) goto end; @@ -370,6 +401,17 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_SRVR_DONE_B: ret=ssl3_get_server_done(s); if (ret <= 0) goto end; +#ifndef OPENSSL_NO_SRP + if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kSRP) + { + if ((ret = SRP_Calc_A_param(s))<=0) + { + SSLerr(SSL_F_SSL3_CONNECT,SSL_R_SRP_A_CALC); + ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_INTERNAL_ERROR); + goto end; + } + } +#endif if (s->s3->tmp.cert_req) s->state=SSL3_ST_CW_CERT_A; else @@ -436,15 +478,15 @@ int ssl3_connect(SSL *s) SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B); if (ret <= 0) goto end; -#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG) s->state=SSL3_ST_CW_FINISHED_A; -#else - if (s->next_proto_negotiated) +#if !defined(OPENSSL_NO_TLSEXT) + if (s->s3->tlsext_channel_id_valid) + s->state=SSL3_ST_CW_CHANNEL_ID_A; +# if !defined(OPENSSL_NO_NEXTPROTONEG) + if (s->s3->next_proto_neg_seen) s->state=SSL3_ST_CW_NEXT_PROTO_A; - else - s->state=SSL3_ST_CW_FINISHED_A; +# endif #endif - s->init_num=0; s->session->cipher=s->s3->tmp.new_cipher; @@ -477,6 +519,18 @@ int ssl3_connect(SSL *s) case SSL3_ST_CW_NEXT_PROTO_B: ret=ssl3_send_next_proto(s); if (ret <= 0) goto end; + if (s->s3->tlsext_channel_id_valid) + s->state=SSL3_ST_CW_CHANNEL_ID_A; + else + s->state=SSL3_ST_CW_FINISHED_A; + break; +#endif + +#if !defined(OPENSSL_NO_TLSEXT) + case SSL3_ST_CW_CHANNEL_ID_A: + case SSL3_ST_CW_CHANNEL_ID_B: + ret=ssl3_send_channel_id(s); + if (ret <= 0) goto end; s->state=SSL3_ST_CW_FINISHED_A; break; #endif @@ -553,7 +607,7 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_FINISHED_A: case SSL3_ST_CR_FINISHED_B: - + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A, SSL3_ST_CR_FINISHED_B); if (ret <= 0) goto end; @@ -611,6 +665,7 @@ int ssl3_connect(SSL *s) /* else do it later in ssl3_write */ s->init_num=0; + s->renegotiate=0; s->new_session=0; ssl_update_cache(s,SSL_SESS_CACHE_CLIENT); @@ -667,7 +722,7 @@ int ssl3_client_hello(SSL *s) unsigned char *buf; unsigned char *p,*d; int i; - unsigned long Time,l; + unsigned long l; #ifndef OPENSSL_NO_COMP int j; SSL_COMP *comp; @@ -698,17 +753,50 @@ int ssl3_client_hello(SSL *s) /* else use the pre-loaded session */ p=s->s3->client_random; - Time=(unsigned long)time(NULL); /* Time */ - l2n(Time,p); - if (RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-4) <= 0) + + if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0) goto err; /* Do the message type and length last */ d=p= &(buf[4]); + /* version indicates the negotiated version: for example from + * an SSLv2/v3 compatible client hello). The client_version + * field is the maximum version we permit and it is also + * used in RSA encrypted premaster secrets. Some servers can + * choke if we initially report a higher version then + * renegotiate to a lower one in the premaster secret. This + * didn't happen with TLS 1.0 as most servers supported it + * but it can with TLS 1.1 or later if the server only supports + * 1.0. + * + * Possible scenario with previous logic: + * 1. Client hello indicates TLS 1.2 + * 2. Server hello says TLS 1.0 + * 3. RSA encrypted premaster secret uses 1.2. + * 4. Handhaked proceeds using TLS 1.0. + * 5. Server sends hello request to renegotiate. + * 6. Client hello indicates TLS v1.0 as we now + * know that is maximum server supports. + * 7. Server chokes on RSA encrypted premaster secret + * containing version 1.0. + * + * For interoperability it should be OK to always use the + * maximum version we support in client hello and then rely + * on the checking of version to ensure the servers isn't + * being inconsistent: for example initially negotiating with + * TLS 1.0 and renegotiating with TLS 1.2. We do this by using + * client_version in client hello and not resetting it to + * the negotiated version. + */ +#if 0 *(p++)=s->version>>8; *(p++)=s->version&0xff; s->client_version=s->version; +#else + *(p++)=s->client_version>>8; + *(p++)=s->client_version&0xff; +#endif /* Random stuff */ memcpy(p,s->s3->client_random,SSL3_RANDOM_SIZE); @@ -738,6 +826,15 @@ int ssl3_client_hello(SSL *s) SSLerr(SSL_F_SSL3_CLIENT_HELLO,SSL_R_NO_CIPHERS_AVAILABLE); goto err; } +#ifdef OPENSSL_MAX_TLS1_2_CIPHER_LENGTH + /* Some servers hang if client hello > 256 bytes + * as hack workaround chop number of supported ciphers + * to keep it well below this if we use TLS v1.2 + */ + if (TLS1_get_version(s) >= TLS1_2_VERSION + && i > OPENSSL_MAX_TLS1_2_CIPHER_LENGTH) + i = OPENSSL_MAX_TLS1_2_CIPHER_LENGTH & ~1; +#endif s2n(i,p); p+=i; @@ -891,6 +988,7 @@ int ssl3_get_server_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT); goto f_err; } + s->s3->flags |= SSL3_FLAGS_CCS_OK; s->hit=1; } else /* a miss or crap from the other end */ @@ -924,6 +1022,14 @@ int ssl3_get_server_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_UNKNOWN_CIPHER_RETURNED); goto f_err; } + /* TLS v1.2 only ciphersuites require v1.2 or later */ + if ((c->algorithm_ssl & SSL_TLSV1_2) && + (TLS1_get_version(s) < TLS1_2_VERSION)) + { + al=SSL_AD_ILLEGAL_PARAMETER; + SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_WRONG_CIPHER_RETURNED); + goto f_err; + } p+=ssl_put_cipher_by_char(s,NULL,NULL); sk=ssl_get_ciphers_by_id(s); @@ -955,9 +1061,14 @@ int ssl3_get_server_hello(SSL *s) } } s->s3->tmp.new_cipher=c; - if (!ssl3_digest_cached_records(s)) + /* Don't digest cached records if TLS v1.2: we may need them for + * client authentication. + */ + if (TLS1_get_version(s) < TLS1_2_VERSION && !ssl3_digest_cached_records(s)) + { + al = SSL_AD_INTERNAL_ERROR; goto f_err; - + } /* lets get the compression algorithm */ /* COMPRESSION */ #ifdef OPENSSL_NO_COMP @@ -1030,7 +1141,7 @@ int ssl3_get_server_hello(SSL *s) /* wrong packet length */ al=SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_BAD_PACKET_LENGTH); - goto err; + goto f_err; } return(1); @@ -1236,6 +1347,7 @@ int ssl3_get_key_exchange(SSL *s) int al,i,j,param_len,ok; long n,alg_k,alg_a; EVP_PKEY *pkey=NULL; + const EVP_MD *md = NULL; #ifndef OPENSSL_NO_RSA RSA *rsa=NULL; #endif @@ -1267,12 +1379,14 @@ int ssl3_get_key_exchange(SSL *s) omitted if no identity hint is sent. Set session->sess_cert anyway to avoid problems later.*/ - if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK) + if (s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK) { s->session->sess_cert=ssl_sess_cert_new(); - if (s->ctx->psk_identity_hint) - OPENSSL_free(s->ctx->psk_identity_hint); - s->ctx->psk_identity_hint = NULL; + if (s->session->psk_identity_hint) + { + OPENSSL_free(s->session->psk_identity_hint); + s->session->psk_identity_hint = NULL; + } } #endif s->s3->tmp.reuse_message=1; @@ -1315,52 +1429,137 @@ int ssl3_get_key_exchange(SSL *s) EVP_MD_CTX_init(&md_ctx); #ifndef OPENSSL_NO_PSK - if (alg_k & SSL_kPSK) + if (alg_a & SSL_aPSK) { char tmp_id_hint[PSK_MAX_IDENTITY_LEN+1]; al=SSL_AD_HANDSHAKE_FAILURE; n2s(p,i); param_len=i+2; - /* Store PSK identity hint for later use, hint is used - * in ssl3_send_client_key_exchange. Assume that the - * maximum length of a PSK identity hint can be as - * long as the maximum length of a PSK identity. */ - if (i > PSK_MAX_IDENTITY_LEN) - { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, - SSL_R_DATA_LENGTH_TOO_LONG); + if (s->session->psk_identity_hint) + { + OPENSSL_free(s->session->psk_identity_hint); + s->session->psk_identity_hint = NULL; + } + if (i != 0) + { + /* Store PSK identity hint for later use, hint is used + * in ssl3_send_client_key_exchange. Assume that the + * maximum length of a PSK identity hint can be as + * long as the maximum length of a PSK identity. */ + if (i > PSK_MAX_IDENTITY_LEN) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, + SSL_R_DATA_LENGTH_TOO_LONG); + goto f_err; + } + if (param_len > n) + { + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, + SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH); + goto f_err; + } + /* If received PSK identity hint contains NULL + * characters, the hint is truncated from the first + * NULL. p may not be ending with NULL, so create a + * NULL-terminated string. */ + memcpy(tmp_id_hint, p, i); + memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i); + s->session->psk_identity_hint = BUF_strdup(tmp_id_hint); + if (s->session->psk_identity_hint == NULL) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE); + goto f_err; + } + } + p+=i; + n-=param_len; + } +#endif /* !OPENSSL_NO_PSK */ + + if (0) {} +#ifndef OPENSSL_NO_SRP + else if (alg_k & SSL_kSRP) + { + n2s(p,i); + param_len=i+2; + if (param_len > n) + { + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_BAD_SRP_N_LENGTH); goto f_err; } + if (!(s->srp_ctx.N=BN_bin2bn(p,i,NULL))) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_BN_LIB); + goto err; + } + p+=i; + + n2s(p,i); + param_len+=i+2; if (param_len > n) { al=SSL_AD_DECODE_ERROR; - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, - SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH); + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_BAD_SRP_G_LENGTH); goto f_err; } - /* If received PSK identity hint contains NULL - * characters, the hint is truncated from the first - * NULL. p may not be ending with NULL, so create a - * NULL-terminated string. */ - memcpy(tmp_id_hint, p, i); - memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i); - if (s->ctx->psk_identity_hint != NULL) - OPENSSL_free(s->ctx->psk_identity_hint); - s->ctx->psk_identity_hint = BUF_strdup(tmp_id_hint); - if (s->ctx->psk_identity_hint == NULL) - { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE); + if (!(s->srp_ctx.g=BN_bin2bn(p,i,NULL))) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_BN_LIB); + goto err; + } + p+=i; + + i = (unsigned int)(p[0]); + p++; + param_len+=i+1; + if (param_len > n) + { + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_BAD_SRP_S_LENGTH); goto f_err; - } + } + if (!(s->srp_ctx.s=BN_bin2bn(p,i,NULL))) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_BN_LIB); + goto err; + } + p+=i; + n2s(p,i); + param_len+=i+2; + if (param_len > n) + { + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_BAD_SRP_B_LENGTH); + goto f_err; + } + if (!(s->srp_ctx.B=BN_bin2bn(p,i,NULL))) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_BN_LIB); + goto err; + } p+=i; n-=param_len; + +/* We must check if there is a certificate */ +#ifndef OPENSSL_NO_RSA + if (alg_a & SSL_aRSA) + pkey=X509_get_pubkey(s->session->sess_cert->peer_pkeys[SSL_PKEY_RSA_ENC].x509); +#else + if (0) + ; +#endif +#ifndef OPENSSL_NO_DSA + else if (alg_a & SSL_aDSS) + pkey=X509_get_pubkey(s->session->sess_cert->peer_pkeys[SSL_PKEY_DSA_SIGN].x509); +#endif } - else -#endif /* !OPENSSL_NO_PSK */ +#endif /* !OPENSSL_NO_SRP */ #ifndef OPENSSL_NO_RSA - if (alg_k & SSL_kRSA) + else if (alg_k & SSL_kRSA) { if ((rsa=RSA_new()) == NULL) { @@ -1409,9 +1608,6 @@ int ssl3_get_key_exchange(SSL *s) s->session->sess_cert->peer_rsa_tmp=rsa; rsa=NULL; } -#else /* OPENSSL_NO_RSA */ - if (0) - ; #endif #ifndef OPENSSL_NO_DH else if (alg_k & SSL_kEDH) @@ -1592,20 +1788,52 @@ int ssl3_get_key_exchange(SSL *s) EC_POINT_free(srvr_ecpoint); srvr_ecpoint = NULL; } - else if (alg_k) +#endif /* !OPENSSL_NO_ECDH */ + + else if (!(alg_k & SSL_kPSK)) { al=SSL_AD_UNEXPECTED_MESSAGE; SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE); goto f_err; } -#endif /* !OPENSSL_NO_ECDH */ - /* p points to the next byte, there are 'n' bytes left */ /* if it was signed, check the signature */ if (pkey != NULL) { + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + int sigalg = tls12_get_sigid(pkey); + /* Should never happen */ + if (sigalg == -1) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); + goto err; + } + /* Check key type is consistent with signature */ + if (sigalg != (int)p[1]) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_WRONG_SIGNATURE_TYPE); + al=SSL_AD_DECODE_ERROR; + goto f_err; + } + md = tls12_get_hash(p[0]); + if (md == NULL) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNKNOWN_DIGEST); + al=SSL_AD_DECODE_ERROR; + goto f_err; + } +#ifdef SSL_DEBUG +fprintf(stderr, "USING TLSv1.2 HASH %s\n", EVP_MD_name(md)); +#endif + p += 2; + n -= 2; + } + else + md = EVP_sha1(); + n2s(p,i); n-=2; j=EVP_PKEY_size(pkey); @@ -1619,7 +1847,7 @@ int ssl3_get_key_exchange(SSL *s) } #ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA) + if (pkey->type == EVP_PKEY_RSA && TLS1_get_version(s) < TLS1_2_VERSION) { int num; @@ -1627,6 +1855,8 @@ int ssl3_get_key_exchange(SSL *s) q=md_buf; for (num=2; num > 0; num--) { + EVP_MD_CTX_set_flags(&md_ctx, + EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); EVP_DigestInit_ex(&md_ctx,(num == 2) ?s->ctx->md5:s->ctx->sha1, NULL); EVP_DigestUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); @@ -1654,11 +1884,8 @@ int ssl3_get_key_exchange(SSL *s) } else #endif -#ifndef OPENSSL_NO_DSA - if (pkey->type == EVP_PKEY_DSA) { - /* lets do DSS */ - EVP_VerifyInit_ex(&md_ctx,EVP_dss1(), NULL); + EVP_VerifyInit_ex(&md_ctx, md, NULL); EVP_VerifyUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); EVP_VerifyUpdate(&md_ctx,&(s->s3->server_random[0]),SSL3_RANDOM_SIZE); EVP_VerifyUpdate(&md_ctx,param,param_len); @@ -1670,35 +1897,12 @@ int ssl3_get_key_exchange(SSL *s) goto f_err; } } - else -#endif -#ifndef OPENSSL_NO_ECDSA - if (pkey->type == EVP_PKEY_EC) - { - /* let's do ECDSA */ - EVP_VerifyInit_ex(&md_ctx,EVP_ecdsa(), NULL); - EVP_VerifyUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); - EVP_VerifyUpdate(&md_ctx,&(s->s3->server_random[0]),SSL3_RANDOM_SIZE); - EVP_VerifyUpdate(&md_ctx,param,param_len); - if (EVP_VerifyFinal(&md_ctx,p,(int)n,pkey) <= 0) - { - /* bad signature */ - al=SSL_AD_DECRYPT_ERROR; - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_BAD_SIGNATURE); - goto f_err; - } - } - else -#endif - { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); - goto err; - } } else { - if (!(alg_a & SSL_aNULL) && !(alg_k & SSL_kPSK)) - /* aNULL or kPSK do not need public keys */ + if (!(alg_a & SSL_aNULL) && + /* Among PSK ciphers only RSA_PSK needs a public key */ + !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA))) { SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); goto err; @@ -1740,7 +1944,7 @@ int ssl3_get_certificate_request(SSL *s) { int ok,ret=0; unsigned long n,nc,l; - unsigned int llen,ctype_num,i; + unsigned int llen, ctype_num,i; X509_NAME *xn=NULL; const unsigned char *p,*q; unsigned char *d; @@ -1760,6 +1964,14 @@ int ssl3_get_certificate_request(SSL *s) if (s->s3->tmp.message_type == SSL3_MT_SERVER_DONE) { s->s3->tmp.reuse_message=1; + /* If we get here we don't need any cached handshake records + * as we wont be doing client auth. + */ + if (s->s3->handshake_buffer) + { + if (!ssl3_digest_cached_records(s)) + goto err; + } return(1); } @@ -1796,6 +2008,27 @@ int ssl3_get_certificate_request(SSL *s) for (i=0; is3->tmp.ctype[i]= p[i]; p+=ctype_num; + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + n2s(p, llen); + /* Check we have enough room for signature algorithms and + * following length value. + */ + if ((unsigned long)(p - d + llen + 2) > n) + { + ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_DECODE_ERROR); + SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST,SSL_R_DATA_LENGTH_TOO_LONG); + goto err; + } + if (llen & 1) + { + ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_DECODE_ERROR); + SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST,SSL_R_SIGNATURE_ALGORITHMS_ERROR); + goto err; + } + tls1_process_sigalgs(s, p, llen); + p += llen; + } /* get the CA RDNs */ n2s(p,llen); @@ -1808,7 +2041,7 @@ fclose(out); } #endif - if ((llen+ctype_num+2+1) != n) + if ((unsigned long)(p - d + llen) != n) { ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_DECODE_ERROR); SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST,SSL_R_LENGTH_MISMATCH); @@ -1914,7 +2147,7 @@ int ssl3_get_new_session_ticket(SSL *s) if (n < 6) { /* need at least ticket_lifetime_hint + ticket length */ - al = SSL3_AL_FATAL,SSL_AD_DECODE_ERROR; + al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_NEW_SESSION_TICKET,SSL_R_LENGTH_MISMATCH); goto f_err; } @@ -1925,7 +2158,7 @@ int ssl3_get_new_session_ticket(SSL *s) /* ticket_lifetime_hint + ticket_length + ticket */ if (ticklen + 6 != n) { - al = SSL3_AL_FATAL,SSL_AD_DECODE_ERROR; + al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_NEW_SESSION_TICKET,SSL_R_LENGTH_MISMATCH); goto f_err; } @@ -2071,6 +2304,7 @@ int ssl3_send_client_key_exchange(SSL *s) unsigned char *p,*d; int n; unsigned long alg_k; + unsigned long alg_a; #ifndef OPENSSL_NO_RSA unsigned char *q; EVP_PKEY *pkey=NULL; @@ -2085,7 +2319,11 @@ int ssl3_send_client_key_exchange(SSL *s) unsigned char *encodedPoint = NULL; int encoded_pt_len = 0; BN_CTX * bn_ctx = NULL; -#endif +#ifndef OPENSSL_NO_PSK + unsigned int psk_len = 0; + unsigned char psk[PSK_MAX_PSK_LEN]; +#endif /* OPENSSL_NO_PSK */ +#endif /* OPENSSL_NO_ECDH */ if (s->state == SSL3_ST_CW_KEY_EXCH_A) { @@ -2093,7 +2331,89 @@ int ssl3_send_client_key_exchange(SSL *s) p= &(d[4]); alg_k=s->s3->tmp.new_cipher->algorithm_mkey; + alg_a=s->s3->tmp.new_cipher->algorithm_auth; +#ifndef OPENSSL_NO_PSK + if (alg_a & SSL_aPSK) + { + char identity[PSK_MAX_IDENTITY_LEN + 1]; + size_t identity_len; + unsigned char *t = NULL; + unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4]; + unsigned int pre_ms_len = 0; + int psk_err = 1; + + n = 0; + if (s->psk_client_callback == NULL) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + SSL_R_PSK_NO_CLIENT_CB); + goto err; + } + + memset(identity, 0, sizeof(identity)); + psk_len = s->psk_client_callback(s, s->session->psk_identity_hint, + identity, sizeof(identity), psk, sizeof(psk)); + if (psk_len > PSK_MAX_PSK_LEN) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto psk_err; + } + else if (psk_len == 0) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + SSL_R_PSK_IDENTITY_NOT_FOUND); + goto psk_err; + } + identity_len = strnlen(identity, sizeof(identity)); + if (identity_len > PSK_MAX_IDENTITY_LEN) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto psk_err; + } + + if (!(alg_k & SSL_kEECDH)) + { + /* Create the shared secret now if we're not using ECDHE-PSK.*/ + pre_ms_len = 2+psk_len+2+psk_len; + t = pre_ms; + s2n(psk_len, t); + memset(t, 0, psk_len); + t+=psk_len; + s2n(psk_len, t); + memcpy(t, psk, psk_len); + + s->session->master_key_length = + s->method->ssl3_enc->generate_master_secret(s, + s->session->master_key, + pre_ms, pre_ms_len); + s2n(identity_len, p); + memcpy(p, identity, identity_len); + n = 2 + identity_len; + } + + if (s->session->psk_identity != NULL) + OPENSSL_free(s->session->psk_identity); + s->session->psk_identity = BUF_strdup(identity); + if (s->session->psk_identity == NULL) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_MALLOC_FAILURE); + goto psk_err; + } + psk_err = 0; + psk_err: + OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN); + OPENSSL_cleanse(pre_ms, sizeof(pre_ms)); + if (psk_err != 0) + { + ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); + goto err; + } + } +#endif /* Fool emacs indentation */ if (0) {} #ifndef OPENSSL_NO_RSA @@ -2354,14 +2674,19 @@ int ssl3_send_client_key_exchange(SSL *s) /* perhaps clean things up a bit EAY EAY EAY EAY*/ } #endif - -#ifndef OPENSSL_NO_ECDH +#ifndef OPENSSL_NO_ECDH else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe)) { const EC_GROUP *srvr_group = NULL; EC_KEY *tkey; int ecdh_clnt_cert = 0; int field_size = 0; +#ifndef OPENSSL_NO_PSK + unsigned char *pre_ms; + unsigned char *t; + unsigned int pre_ms_len; + unsigned int i; +#endif /* Did we send out the client's * ECDH share for use in premaster @@ -2482,15 +2807,41 @@ int ssl3_send_client_key_exchange(SSL *s) goto err; } - /* generate master key from the result */ - s->session->master_key_length = s->method->ssl3_enc \ - -> generate_master_secret(s, - s->session->master_key, - p, n); - +#ifndef OPENSSL_NO_PSK + /* ECDHE PSK ciphersuites from RFC 5489 */ + if ((alg_a & SSL_aPSK) && psk_len != 0) + { + pre_ms_len = 2+psk_len+2+n; + pre_ms = OPENSSL_malloc(pre_ms_len); + if (pre_ms == NULL) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_MALLOC_FAILURE); + goto err; + } + memset(pre_ms, 0, pre_ms_len); + t = pre_ms; + s2n(psk_len, t); + memcpy(t, psk, psk_len); + t += psk_len; + s2n(n, t); + memcpy(t, p, n); + s->session->master_key_length = s->method->ssl3_enc \ + -> generate_master_secret(s, + s->session->master_key, pre_ms, pre_ms_len); + OPENSSL_cleanse(pre_ms, pre_ms_len); + OPENSSL_free(pre_ms); + } +#endif /* OPENSSL_NO_PSK */ + if (!(alg_a & SSL_aPSK)) + { + /* generate master key from the result */ + s->session->master_key_length = s->method->ssl3_enc \ + -> generate_master_secret(s, + s->session->master_key, p, n); + } memset(p, 0, n); /* clean up */ - - if (ecdh_clnt_cert) + if (ecdh_clnt_cert) { /* Send empty client key exch message */ n = 0; @@ -2518,29 +2869,42 @@ int ssl3_send_client_key_exchange(SSL *s) } /* Encode the public key */ - n = EC_POINT_point2oct(srvr_group, - EC_KEY_get0_public_key(clnt_ecdh), - POINT_CONVERSION_UNCOMPRESSED, + encoded_pt_len = EC_POINT_point2oct(srvr_group, + EC_KEY_get0_public_key(clnt_ecdh), + POINT_CONVERSION_UNCOMPRESSED, encodedPoint, encoded_pt_len, bn_ctx); + + n = 0; +#ifndef OPENSSL_NO_PSK + if ((alg_a & SSL_aPSK) && psk_len != 0) + { + i = strlen(s->session->psk_identity); + s2n(i, p); + memcpy(p, s->session->psk_identity, i); + p += i; + n = i + 2; + } +#endif - *p = n; /* length of encoded point */ + *p = encoded_pt_len; /* length of encoded point */ /* Encoded point will be copied here */ - p += 1; + p += 1; + n += 1; /* copy the point */ - memcpy((unsigned char *)p, encodedPoint, n); + memcpy((unsigned char *)p, encodedPoint, encoded_pt_len); /* increment n to account for length field */ - n += 1; + n += encoded_pt_len; } /* Free allocated memory */ BN_CTX_free(bn_ctx); if (encodedPoint != NULL) OPENSSL_free(encodedPoint); - if (clnt_ecdh != NULL) + if (clnt_ecdh != NULL) EC_KEY_free(clnt_ecdh); EVP_PKEY_free(srvr_pub_pkey); } #endif /* !OPENSSL_NO_ECDH */ - else if (alg_k & SSL_kGOST) + else if (alg_k & SSL_kGOST) { /* GOST key exchange message creation */ EVP_PKEY_CTX *pkey_ctx; @@ -2630,89 +2994,40 @@ int ssl3_send_client_key_exchange(SSL *s) EVP_PKEY_free(pub_key); } -#ifndef OPENSSL_NO_PSK - else if (alg_k & SSL_kPSK) +#ifndef OPENSSL_NO_SRP + else if (alg_k & SSL_kSRP) { - char identity[PSK_MAX_IDENTITY_LEN]; - unsigned char *t = NULL; - unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4]; - unsigned int pre_ms_len = 0, psk_len = 0; - int psk_err = 1; - - n = 0; - if (s->psk_client_callback == NULL) + if (s->srp_ctx.A != NULL) { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_NO_CLIENT_CB); - goto err; - } - - psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint, - identity, PSK_MAX_IDENTITY_LEN, - psk_or_pre_ms, sizeof(psk_or_pre_ms)); - if (psk_len > PSK_MAX_PSK_LEN) - { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto psk_err; - } - else if (psk_len == 0) - { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_IDENTITY_NOT_FOUND); - goto psk_err; + /* send off the data */ + n=BN_num_bytes(s->srp_ctx.A); + s2n(n,p); + BN_bn2bin(s->srp_ctx.A,p); + n+=2; } - - /* create PSK pre_master_secret */ - pre_ms_len = 2+psk_len+2+psk_len; - t = psk_or_pre_ms; - memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len); - s2n(psk_len, t); - memset(t, 0, psk_len); - t+=psk_len; - s2n(psk_len, t); - - if (s->session->psk_identity_hint != NULL) - OPENSSL_free(s->session->psk_identity_hint); - s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint); - if (s->ctx->psk_identity_hint != NULL && - s->session->psk_identity_hint == NULL) + else { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); + goto err; } - - if (s->session->psk_identity != NULL) - OPENSSL_free(s->session->psk_identity); - s->session->psk_identity = BUF_strdup(identity); - if (s->session->psk_identity == NULL) + if (s->session->srp_username != NULL) + OPENSSL_free(s->session->srp_username); + s->session->srp_username = BUF_strdup(s->srp_ctx.login); + if (s->session->srp_username == NULL) { SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE); - goto psk_err; + goto err; } - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s->session->master_key, - psk_or_pre_ms, pre_ms_len); - n = strlen(identity); - s2n(n, p); - memcpy(p, identity, n); - n+=2; - psk_err = 0; - psk_err: - OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN); - OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms)); - if (psk_err != 0) + if ((s->session->master_key_length = SRP_generate_client_master_secret(s,s->session->master_key))<0) { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); goto err; } } #endif - else + else if (!(alg_k & SSL_kPSK)) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); @@ -2749,12 +3064,13 @@ int ssl3_send_client_verify(SSL *s) unsigned char data[MD5_DIGEST_LENGTH+SHA_DIGEST_LENGTH]; EVP_PKEY *pkey; EVP_PKEY_CTX *pctx=NULL; -#ifndef OPENSSL_NO_RSA + EVP_MD_CTX mctx; unsigned u=0; -#endif unsigned long n; int j; + EVP_MD_CTX_init(&mctx); + if (s->state == SSL3_ST_CW_CERT_VRFY_A) { d=(unsigned char *)s->init_buf->data; @@ -2765,7 +3081,8 @@ int ssl3_send_client_verify(SSL *s) EVP_PKEY_sign_init(pctx); if (EVP_PKEY_CTX_set_signature_md(pctx, EVP_sha1())>0) { - s->method->ssl3_enc->cert_verify_mac(s, + if (TLS1_get_version(s) < TLS1_2_VERSION) + s->method->ssl3_enc->cert_verify_mac(s, NID_sha1, &(data[MD5_DIGEST_LENGTH])); } @@ -2773,6 +3090,62 @@ int ssl3_send_client_verify(SSL *s) { ERR_clear_error(); } + /* For TLS v1.2 send signature algorithm and signature + * using agreed digest and cached handshake records. + */ + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + long hdatalen = 0; + void *hdata; + const EVP_MD *md; + switch (ssl_cert_type(NULL, pkey)) + { + case SSL_PKEY_RSA_ENC: + md = s->s3->digest_rsa; + break; + case SSL_PKEY_DSA_SIGN: + md = s->s3->digest_dsa; + break; + case SSL_PKEY_ECC: + md = s->s3->digest_ecdsa; + break; + default: + md = NULL; + } + if (!md) + /* Unlike with the SignatureAlgorithm extension (sent by clients), + * there are no default algorithms for the CertificateRequest message + * (sent by servers). However, now that we've sent a certificate + * for which we don't really know what hash to use for signing, the + * best we can do is try a default algorithm. */ + md = EVP_sha1(); + hdatalen = BIO_get_mem_data(s->s3->handshake_buffer, + &hdata); + if (hdatalen <= 0 || !tls12_get_sigandhash(p, pkey, md)) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_VERIFY, + ERR_R_INTERNAL_ERROR); + goto err; + } + p += 2; +#ifdef SSL_DEBUG + fprintf(stderr, "Using TLS 1.2 with client alg %s\n", + EVP_MD_name(md)); +#endif + if (!EVP_SignInit_ex(&mctx, md, NULL) + || !EVP_SignUpdate(&mctx, hdata, hdatalen) + || !EVP_SignFinal(&mctx, p + 2, &u, pkey)) + { + SSLerr(SSL_F_SSL3_SEND_CLIENT_VERIFY, + ERR_R_EVP_LIB); + goto err; + } + s2n(u,p); + n = u + 4; + if (!ssl3_digest_cached_records(s)) + goto err; + } + else #ifndef OPENSSL_NO_RSA if (pkey->type == EVP_PKEY_RSA) { @@ -2855,9 +3228,11 @@ int ssl3_send_client_verify(SSL *s) s->init_num=(int)n+4; s->init_off=0; } + EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_CTX_free(pctx); return(ssl3_do_write(s,SSL3_RT_HANDSHAKE)); err: + EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_CTX_free(pctx); return(-1); } @@ -2957,7 +3332,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) alg_a=s->s3->tmp.new_cipher->algorithm_auth; /* we don't have a certificate */ - if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || (alg_k & SSL_kPSK)) + if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || ((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA))) return(1); sc=s->session->sess_cert; @@ -2981,7 +3356,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) if (idx == SSL_PKEY_ECC) { if (ssl_check_srvr_ecc_cert_and_alg(sc->peer_pkeys[idx].x509, - s->s3->tmp.new_cipher) == 0) + s) == 0) { /* check failed */ SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM,SSL_R_BAD_ECC_CERT); goto f_err; @@ -3077,13 +3452,8 @@ err: return(0); } -/* Check to see if handshake is full or resumed. Usually this is just a - * case of checking to see if a cache hit has occurred. In the case of - * session tickets we have to check the next message to be sure. - */ - -#ifndef OPENSSL_NO_TLSEXT -# ifndef OPENSSL_NO_NEXTPROTONEG +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) int ssl3_send_next_proto(SSL *s) { unsigned int len, padding_len; @@ -3106,9 +3476,124 @@ int ssl3_send_next_proto(SSL *s) } return ssl3_do_write(s, SSL3_RT_HANDSHAKE); +} +# endif /* !OPENSSL_NO_NEXTPROTONEG */ + +int ssl3_send_channel_id(SSL *s) + { + unsigned char *d; + int ret = -1, public_key_len; + EVP_MD_CTX md_ctx; + size_t sig_len; + ECDSA_SIG *sig = NULL; + unsigned char *public_key = NULL, *derp, *der_sig = NULL; + + if (s->state != SSL3_ST_CW_CHANNEL_ID_A) + return ssl3_do_write(s, SSL3_RT_HANDSHAKE); + + d = (unsigned char *)s->init_buf->data; + *(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS; + l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d); + s2n(TLSEXT_TYPE_channel_id, d); + s2n(TLSEXT_CHANNEL_ID_SIZE, d); + + EVP_MD_CTX_init(&md_ctx); + + public_key_len = i2d_PublicKey(s->tlsext_channel_id_private, NULL); + if (public_key_len <= 0) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY); + goto err; + } + // i2d_PublicKey will produce an ANSI X9.62 public key which, for a + // P-256 key, is 0x04 (meaning uncompressed) followed by the x and y + // field elements as 32-byte, big-endian numbers. + if (public_key_len != 65) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256); + goto err; + } + public_key = OPENSSL_malloc(public_key_len); + if (!public_key) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE); + goto err; + } + + derp = public_key; + i2d_PublicKey(s->tlsext_channel_id_private, &derp); + + if (EVP_DigestSignInit(&md_ctx, NULL, EVP_sha256(), NULL, + s->tlsext_channel_id_private) != 1) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNINIT_FAILED); + goto err; + } + + if (!tls1_channel_id_hash(&md_ctx, s)) + goto err; + + if (!EVP_DigestSignFinal(&md_ctx, NULL, &sig_len)) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED); + goto err; + } + + der_sig = OPENSSL_malloc(sig_len); + if (!der_sig) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE); + goto err; + } + + if (!EVP_DigestSignFinal(&md_ctx, der_sig, &sig_len)) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED); + goto err; + } + + derp = der_sig; + sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len); + if (sig == NULL) + { + SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG); + goto err; + } + + // The first byte of public_key will be 0x4, denoting an uncompressed key. + memcpy(d, public_key + 1, 64); + d += 64; + memset(d, 0, 2 * 32); + BN_bn2bin(sig->r, d + 32 - BN_num_bytes(sig->r)); + d += 32; + BN_bn2bin(sig->s, d + 32 - BN_num_bytes(sig->s)); + d += 32; + + s->state = SSL3_ST_CW_CHANNEL_ID_B; + s->init_num = 4 + 2 + 2 + TLSEXT_CHANNEL_ID_SIZE; + s->init_off = 0; + + ret = ssl3_do_write(s, SSL3_RT_HANDSHAKE); + +err: + EVP_MD_CTX_cleanup(&md_ctx); + if (public_key) + OPENSSL_free(public_key); + if (der_sig) + OPENSSL_free(der_sig); + if (sig) + ECDSA_SIG_free(sig); + + return ret; } -# endif +#endif /* !OPENSSL_NO_TLSEXT */ +/* Check to see if handshake is full or resumed. Usually this is just a + * case of checking to see if a cache hit has occurred. In the case of + * session tickets we have to check the next message to be sure. + */ + +#ifndef OPENSSL_NO_TLSEXT int ssl3_check_finished(SSL *s) { int ok; diff --git a/app/openssl/ssl/s3_enc.c b/app/openssl/ssl/s3_enc.c index b1459707..90fbb180 100644 --- a/app/openssl/ssl/s3_enc.c +++ b/app/openssl/ssl/s3_enc.c @@ -170,6 +170,7 @@ static int ssl3_generate_key_block(SSL *s, unsigned char *km, int num) #endif k=0; EVP_MD_CTX_init(&m5); + EVP_MD_CTX_set_flags(&m5, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); EVP_MD_CTX_init(&s1); for (i=0; (int)ioptions & SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS)) + /* enable vulnerability countermeasure for CBC ciphers with + * known-IV problem (http://www.openssl.org/~bodo/tls-cbc.txt) */ + if ((s->mode & SSL_MODE_CBC_RECORD_SPLITTING) != 0) { - /* enable vulnerability countermeasure for CBC ciphers with - * known-IV problem (http://www.openssl.org/~bodo/tls-cbc.txt) - */ - s->s3->need_empty_fragments = 1; + s->s3->need_record_splitting = 1; if (s->session->cipher != NULL) { if (s->session->cipher->algorithm_enc == SSL_eNULL) - s->s3->need_empty_fragments = 0; - + s->s3->need_record_splitting = 0; + #ifndef OPENSSL_NO_RC4 if (s->session->cipher->algorithm_enc == SSL_RC4) - s->s3->need_empty_fragments = 0; + s->s3->need_record_splitting = 0; #endif } } return ret; - + err: SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,ERR_R_MALLOC_FAILURE); return(0); @@ -465,12 +465,21 @@ void ssl3_cleanup_key_block(SSL *s) s->s3->tmp.key_block_length=0; } +/* ssl3_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively. + * + * Returns: + * 0: (in non-constant time) if the record is publically invalid (i.e. too + * short etc). + * 1: if the record's padding is valid / the encryption was successful. + * -1: if the record's padding is invalid or, if sending, an internal error + * occured. + */ int ssl3_enc(SSL *s, int send) { SSL3_RECORD *rec; EVP_CIPHER_CTX *ds; unsigned long l; - int bs,i; + int bs,i,mac_size=0; const EVP_CIPHER *enc; if (send) @@ -521,32 +530,16 @@ int ssl3_enc(SSL *s, int send) if (!send) { if (l == 0 || l%bs != 0) - { - SSLerr(SSL_F_SSL3_ENC,SSL_R_BLOCK_CIPHER_PAD_IS_WRONG); - ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_DECRYPTION_FAILED); return 0; - } /* otherwise, rec->length >= bs */ } EVP_Cipher(ds,rec->data,rec->input,l); + if (EVP_MD_CTX_md(s->read_hash) != NULL) + mac_size = EVP_MD_CTX_size(s->read_hash); if ((bs != 1) && !send) - { - i=rec->data[l-1]+1; - /* SSL 3.0 bounds the number of padding bytes by the block size; - * padding bytes (except the last one) are arbitrary */ - if (i > bs) - { - /* Incorrect padding. SSLerr() and ssl3_alert are done - * by caller: we don't want to reveal whether this is - * a decryption error or a MAC verification failure - * (see http://www.openssl.org/~bodo/tls-cbc.txt) */ - return -1; - } - /* now i <= bs <= rec->length */ - rec->length-=i; - } + return ssl3_cbc_remove_padding(s, rec, bs, mac_size); } return(1); } @@ -571,12 +564,12 @@ void ssl3_free_digest_list(SSL *s) OPENSSL_free(s->s3->handshake_dgst); s->s3->handshake_dgst=NULL; } - + void ssl3_finish_mac(SSL *s, const unsigned char *buf, int len) { - if (s->s3->handshake_buffer) + if (s->s3->handshake_buffer && !(s->s3->flags & TLS1_FLAGS_KEEP_HANDSHAKE)) { BIO_write (s->s3->handshake_buffer,(void *)buf,len); } @@ -613,9 +606,16 @@ int ssl3_digest_cached_records(SSL *s) /* Loop through bitso of algorithm2 field and create MD_CTX-es */ for (i=0;ssl_get_handshake_digest(i,&mask,&md); i++) { - if ((mask & s->s3->tmp.new_cipher->algorithm2) && md) + if ((mask & ssl_get_algorithm2(s)) && md) { s->s3->handshake_dgst[i]=EVP_MD_CTX_create(); +#ifdef OPENSSL_FIPS + if (EVP_MD_nid(md) == NID_md5) + { + EVP_MD_CTX_set_flags(s->s3->handshake_dgst[i], + EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + } +#endif EVP_DigestInit_ex(s->s3->handshake_dgst[i],md,NULL); EVP_DigestUpdate(s->s3->handshake_dgst[i],hdata,hdatalen); } @@ -624,9 +624,12 @@ int ssl3_digest_cached_records(SSL *s) s->s3->handshake_dgst[i]=NULL; } } - /* Free handshake_buffer BIO */ - BIO_free(s->s3->handshake_buffer); - s->s3->handshake_buffer = NULL; + if (!(s->s3->flags & TLS1_FLAGS_KEEP_HANDSHAKE)) + { + /* Free handshake_buffer BIO */ + BIO_free(s->s3->handshake_buffer); + s->s3->handshake_buffer = NULL; + } return 1; } @@ -672,6 +675,7 @@ static int ssl3_handshake_mac(SSL *s, int md_nid, return 0; } EVP_MD_CTX_init(&ctx); + EVP_MD_CTX_set_flags(&ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); EVP_MD_CTX_copy_ex(&ctx,d); n=EVP_MD_CTX_size(&ctx); if (n < 0) @@ -704,7 +708,7 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send) EVP_MD_CTX md_ctx; const EVP_MD_CTX *hash; unsigned char *p,rec_char; - unsigned int md_size; + size_t md_size, orig_len; int npad; int t; @@ -729,28 +733,72 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send) md_size=t; npad=(48/md_size)*md_size; - /* Chop the digest off the end :-) */ - EVP_MD_CTX_init(&md_ctx); - - EVP_MD_CTX_copy_ex( &md_ctx,hash); - EVP_DigestUpdate(&md_ctx,mac_sec,md_size); - EVP_DigestUpdate(&md_ctx,ssl3_pad_1,npad); - EVP_DigestUpdate(&md_ctx,seq,8); - rec_char=rec->type; - EVP_DigestUpdate(&md_ctx,&rec_char,1); - p=md; - s2n(rec->length,p); - EVP_DigestUpdate(&md_ctx,md,2); - EVP_DigestUpdate(&md_ctx,rec->input,rec->length); - EVP_DigestFinal_ex( &md_ctx,md,NULL); - - EVP_MD_CTX_copy_ex( &md_ctx,hash); - EVP_DigestUpdate(&md_ctx,mac_sec,md_size); - EVP_DigestUpdate(&md_ctx,ssl3_pad_2,npad); - EVP_DigestUpdate(&md_ctx,md,md_size); - EVP_DigestFinal_ex( &md_ctx,md,&md_size); - - EVP_MD_CTX_cleanup(&md_ctx); + /* kludge: ssl3_cbc_remove_padding passes padding length in rec->type */ + orig_len = rec->length+md_size+((unsigned int)rec->type>>8); + rec->type &= 0xff; + + if (!send && + EVP_CIPHER_CTX_mode(ssl->enc_read_ctx) == EVP_CIPH_CBC_MODE && + ssl3_cbc_record_digest_supported(hash)) + { + /* This is a CBC-encrypted record. We must avoid leaking any + * timing-side channel information about how many blocks of + * data we are hashing because that gives an attacker a + * timing-oracle. */ + + /* npad is, at most, 48 bytes and that's with MD5: + * 16 + 48 + 8 (sequence bytes) + 1 + 2 = 75. + * + * With SHA-1 (the largest hash speced for SSLv3) the hash size + * goes up 4, but npad goes down by 8, resulting in a smaller + * total size. */ + unsigned char header[75]; + unsigned j = 0; + memcpy(header+j, mac_sec, md_size); + j += md_size; + memcpy(header+j, ssl3_pad_1, npad); + j += npad; + memcpy(header+j, seq, 8); + j += 8; + header[j++] = rec->type; + header[j++] = rec->length >> 8; + header[j++] = rec->length & 0xff; + + ssl3_cbc_digest_record( + hash, + md, &md_size, + header, rec->input, + rec->length + md_size, orig_len, + mac_sec, md_size, + 1 /* is SSLv3 */); + } + else + { + unsigned int md_size_u; + /* Chop the digest off the end :-) */ + EVP_MD_CTX_init(&md_ctx); + + EVP_MD_CTX_copy_ex( &md_ctx,hash); + EVP_DigestUpdate(&md_ctx,mac_sec,md_size); + EVP_DigestUpdate(&md_ctx,ssl3_pad_1,npad); + EVP_DigestUpdate(&md_ctx,seq,8); + rec_char=rec->type; + EVP_DigestUpdate(&md_ctx,&rec_char,1); + p=md; + s2n(rec->length,p); + EVP_DigestUpdate(&md_ctx,md,2); + EVP_DigestUpdate(&md_ctx,rec->input,rec->length); + EVP_DigestFinal_ex( &md_ctx,md,NULL); + + EVP_MD_CTX_copy_ex( &md_ctx,hash); + EVP_DigestUpdate(&md_ctx,mac_sec,md_size); + EVP_DigestUpdate(&md_ctx,ssl3_pad_2,npad); + EVP_DigestUpdate(&md_ctx,md,md_size); + EVP_DigestFinal_ex( &md_ctx,md,&md_size_u); + md_size = md_size_u; + + EVP_MD_CTX_cleanup(&md_ctx); + } ssl3_record_sequence_update(seq); return(md_size); diff --git a/app/openssl/ssl/s3_lib.c b/app/openssl/ssl/s3_lib.c index 28ee4746..f84da7f5 100644 --- a/app/openssl/ssl/s3_lib.c +++ b/app/openssl/ssl/s3_lib.c @@ -1071,6 +1071,103 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 256, }, + /* TLS v1.2 ciphersuites */ + /* Cipher 3B */ + { + 1, + TLS1_TXT_RSA_WITH_NULL_SHA256, + TLS1_CK_RSA_WITH_NULL_SHA256, + SSL_kRSA, + SSL_aRSA, + SSL_eNULL, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 0, + 0, + }, + + /* Cipher 3C */ + { + 1, + TLS1_TXT_RSA_WITH_AES_128_SHA256, + TLS1_CK_RSA_WITH_AES_128_SHA256, + SSL_kRSA, + SSL_aRSA, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher 3D */ + { + 1, + TLS1_TXT_RSA_WITH_AES_256_SHA256, + TLS1_CK_RSA_WITH_AES_256_SHA256, + SSL_kRSA, + SSL_aRSA, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher 3E */ + { + 0, /* not implemented (non-ephemeral DH) */ + TLS1_TXT_DH_DSS_WITH_AES_128_SHA256, + TLS1_CK_DH_DSS_WITH_AES_128_SHA256, + SSL_kDHd, + SSL_aDH, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher 3F */ + { + 0, /* not implemented (non-ephemeral DH) */ + TLS1_TXT_DH_RSA_WITH_AES_128_SHA256, + TLS1_CK_DH_RSA_WITH_AES_128_SHA256, + SSL_kDHr, + SSL_aDH, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher 40 */ + { + 1, + TLS1_TXT_DHE_DSS_WITH_AES_128_SHA256, + TLS1_CK_DHE_DSS_WITH_AES_128_SHA256, + SSL_kEDH, + SSL_aDSS, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + #ifndef OPENSSL_NO_CAMELLIA /* Camellia ciphersuites from RFC4132 (128-bit portion) */ @@ -1287,6 +1384,122 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 128, }, #endif + + /* TLS v1.2 ciphersuites */ + /* Cipher 67 */ + { + 1, + TLS1_TXT_DHE_RSA_WITH_AES_128_SHA256, + TLS1_CK_DHE_RSA_WITH_AES_128_SHA256, + SSL_kEDH, + SSL_aRSA, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher 68 */ + { + 0, /* not implemented (non-ephemeral DH) */ + TLS1_TXT_DH_DSS_WITH_AES_256_SHA256, + TLS1_CK_DH_DSS_WITH_AES_256_SHA256, + SSL_kDHd, + SSL_aDH, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher 69 */ + { + 0, /* not implemented (non-ephemeral DH) */ + TLS1_TXT_DH_RSA_WITH_AES_256_SHA256, + TLS1_CK_DH_RSA_WITH_AES_256_SHA256, + SSL_kDHr, + SSL_aDH, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher 6A */ + { + 1, + TLS1_TXT_DHE_DSS_WITH_AES_256_SHA256, + TLS1_CK_DHE_DSS_WITH_AES_256_SHA256, + SSL_kEDH, + SSL_aDSS, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher 6B */ + { + 1, + TLS1_TXT_DHE_RSA_WITH_AES_256_SHA256, + TLS1_CK_DHE_RSA_WITH_AES_256_SHA256, + SSL_kEDH, + SSL_aRSA, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher 6C */ + { + 1, + TLS1_TXT_ADH_WITH_AES_128_SHA256, + TLS1_CK_ADH_WITH_AES_128_SHA256, + SSL_kEDH, + SSL_aNULL, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher 6D */ + { + 1, + TLS1_TXT_ADH_WITH_AES_256_SHA256, + TLS1_CK_ADH_WITH_AES_256_SHA256, + SSL_kEDH, + SSL_aNULL, + SSL_AES256, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* GOST Ciphersuites */ + { 1, "GOST94-GOST89-GOST89", @@ -1470,7 +1683,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 168, 168, @@ -1486,7 +1699,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_AES128, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 128, 128, @@ -1502,7 +1715,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_AES256, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 256, 256, @@ -1610,130 +1823,324 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ #endif /* OPENSSL_NO_SEED */ -#ifndef OPENSSL_NO_ECDH - /* Cipher C001 */ - { - 1, - TLS1_TXT_ECDH_ECDSA_WITH_NULL_SHA, - TLS1_CK_ECDH_ECDSA_WITH_NULL_SHA, - SSL_kECDHe, - SSL_aECDH, - SSL_eNULL, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_STRONG_NONE, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 0, - 0, - }, + /* GCM ciphersuites from RFC5288 */ - /* Cipher C002 */ + /* Cipher 9C */ { 1, - TLS1_TXT_ECDH_ECDSA_WITH_RC4_128_SHA, - TLS1_CK_ECDH_ECDSA_WITH_RC4_128_SHA, - SSL_kECDHe, - SSL_aECDH, - SSL_RC4, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_MEDIUM, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + TLS1_TXT_RSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_RSA_WITH_AES_128_GCM_SHA256, + SSL_kRSA, + SSL_aRSA, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C003 */ + /* Cipher 9D */ { 1, - TLS1_TXT_ECDH_ECDSA_WITH_DES_192_CBC3_SHA, - TLS1_CK_ECDH_ECDSA_WITH_DES_192_CBC3_SHA, - SSL_kECDHe, - SSL_aECDH, - SSL_3DES, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 168, - 168, + TLS1_TXT_RSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_RSA_WITH_AES_256_GCM_SHA384, + SSL_kRSA, + SSL_aRSA, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, }, - /* Cipher C004 */ + /* Cipher 9E */ { 1, - TLS1_TXT_ECDH_ECDSA_WITH_AES_128_CBC_SHA, - TLS1_CK_ECDH_ECDSA_WITH_AES_128_CBC_SHA, - SSL_kECDHe, - SSL_aECDH, - SSL_AES128, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + TLS1_TXT_DHE_RSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_DHE_RSA_WITH_AES_128_GCM_SHA256, + SSL_kEDH, + SSL_aRSA, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C005 */ + /* Cipher 9F */ { 1, - TLS1_TXT_ECDH_ECDSA_WITH_AES_256_CBC_SHA, - TLS1_CK_ECDH_ECDSA_WITH_AES_256_CBC_SHA, - SSL_kECDHe, - SSL_aECDH, - SSL_AES256, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + TLS1_TXT_DHE_RSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_DHE_RSA_WITH_AES_256_GCM_SHA384, + SSL_kEDH, + SSL_aRSA, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, 256, 256, }, - /* Cipher C006 */ + /* Cipher A0 */ { - 1, - TLS1_TXT_ECDHE_ECDSA_WITH_NULL_SHA, - TLS1_CK_ECDHE_ECDSA_WITH_NULL_SHA, - SSL_kEECDH, - SSL_aECDSA, - SSL_eNULL, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_STRONG_NONE, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 0, - 0, - }, - - /* Cipher C007 */ - { - 1, - TLS1_TXT_ECDHE_ECDSA_WITH_RC4_128_SHA, - TLS1_CK_ECDHE_ECDSA_WITH_RC4_128_SHA, - SSL_kEECDH, - SSL_aECDSA, - SSL_RC4, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_MEDIUM, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + TLS1_TXT_DH_RSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_DH_RSA_WITH_AES_128_GCM_SHA256, + SSL_kDHr, + SSL_aDH, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C008 */ + /* Cipher A1 */ { - 1, - TLS1_TXT_ECDHE_ECDSA_WITH_DES_192_CBC3_SHA, - TLS1_CK_ECDHE_ECDSA_WITH_DES_192_CBC3_SHA, + 0, + TLS1_TXT_DH_RSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_DH_RSA_WITH_AES_256_GCM_SHA384, + SSL_kDHr, + SSL_aDH, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher A2 */ + { + 1, + TLS1_TXT_DHE_DSS_WITH_AES_128_GCM_SHA256, + TLS1_CK_DHE_DSS_WITH_AES_128_GCM_SHA256, + SSL_kEDH, + SSL_aDSS, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher A3 */ + { + 1, + TLS1_TXT_DHE_DSS_WITH_AES_256_GCM_SHA384, + TLS1_CK_DHE_DSS_WITH_AES_256_GCM_SHA384, + SSL_kEDH, + SSL_aDSS, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher A4 */ + { + 0, + TLS1_TXT_DH_DSS_WITH_AES_128_GCM_SHA256, + TLS1_CK_DH_DSS_WITH_AES_128_GCM_SHA256, + SSL_kDHd, + SSL_aDH, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher A5 */ + { + 0, + TLS1_TXT_DH_DSS_WITH_AES_256_GCM_SHA384, + TLS1_CK_DH_DSS_WITH_AES_256_GCM_SHA384, + SSL_kDHd, + SSL_aDH, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher A6 */ + { + 1, + TLS1_TXT_ADH_WITH_AES_128_GCM_SHA256, + TLS1_CK_ADH_WITH_AES_128_GCM_SHA256, + SSL_kEDH, + SSL_aNULL, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher A7 */ + { + 1, + TLS1_TXT_ADH_WITH_AES_256_GCM_SHA384, + TLS1_CK_ADH_WITH_AES_256_GCM_SHA384, + SSL_kEDH, + SSL_aNULL, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + +#ifndef OPENSSL_NO_ECDH + /* Cipher C001 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_NULL_SHA, + TLS1_CK_ECDH_ECDSA_WITH_NULL_SHA, + SSL_kECDHe, + SSL_aECDH, + SSL_eNULL, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 0, + 0, + }, + + /* Cipher C002 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_RC4_128_SHA, + TLS1_CK_ECDH_ECDSA_WITH_RC4_128_SHA, + SSL_kECDHe, + SSL_aECDH, + SSL_RC4, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_MEDIUM, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C003 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_DES_192_CBC3_SHA, + TLS1_CK_ECDH_ECDSA_WITH_DES_192_CBC3_SHA, + SSL_kECDHe, + SSL_aECDH, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C004 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_AES_128_CBC_SHA, + TLS1_CK_ECDH_ECDSA_WITH_AES_128_CBC_SHA, + SSL_kECDHe, + SSL_aECDH, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C005 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_AES_256_CBC_SHA, + TLS1_CK_ECDH_ECDSA_WITH_AES_256_CBC_SHA, + SSL_kECDHe, + SSL_aECDH, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher C006 */ + { + 1, + TLS1_TXT_ECDHE_ECDSA_WITH_NULL_SHA, + TLS1_CK_ECDHE_ECDSA_WITH_NULL_SHA, + SSL_kEECDH, + SSL_aECDSA, + SSL_eNULL, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 0, + 0, + }, + + /* Cipher C007 */ + { + 1, + TLS1_TXT_ECDHE_ECDSA_WITH_RC4_128_SHA, + TLS1_CK_ECDHE_ECDSA_WITH_RC4_128_SHA, + SSL_kEECDH, + SSL_aECDSA, + SSL_RC4, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_MEDIUM, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C008 */ + { + 1, + TLS1_TXT_ECDHE_ECDSA_WITH_DES_192_CBC3_SHA, + TLS1_CK_ECDHE_ECDSA_WITH_DES_192_CBC3_SHA, SSL_kEECDH, SSL_aECDSA, SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 168, 168, @@ -1749,7 +2156,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_AES128, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 128, 128, @@ -1765,7 +2172,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_AES256, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 256, 256, @@ -1781,7 +2188,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ SSL_eNULL, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP|SSL_STRONG_NONE, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, 0, 0, @@ -1803,214 +2210,660 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ 128, }, - /* Cipher C00D */ + /* Cipher C00D */ + { + 1, + TLS1_TXT_ECDH_RSA_WITH_DES_192_CBC3_SHA, + TLS1_CK_ECDH_RSA_WITH_DES_192_CBC3_SHA, + SSL_kECDHr, + SSL_aECDH, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C00E */ + { + 1, + TLS1_TXT_ECDH_RSA_WITH_AES_128_CBC_SHA, + TLS1_CK_ECDH_RSA_WITH_AES_128_CBC_SHA, + SSL_kECDHr, + SSL_aECDH, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C00F */ + { + 1, + TLS1_TXT_ECDH_RSA_WITH_AES_256_CBC_SHA, + TLS1_CK_ECDH_RSA_WITH_AES_256_CBC_SHA, + SSL_kECDHr, + SSL_aECDH, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher C010 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_NULL_SHA, + TLS1_CK_ECDHE_RSA_WITH_NULL_SHA, + SSL_kEECDH, + SSL_aRSA, + SSL_eNULL, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 0, + 0, + }, + + /* Cipher C011 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_RC4_128_SHA, + TLS1_CK_ECDHE_RSA_WITH_RC4_128_SHA, + SSL_kEECDH, + SSL_aRSA, + SSL_RC4, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_MEDIUM, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C012 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_DES_192_CBC3_SHA, + TLS1_CK_ECDHE_RSA_WITH_DES_192_CBC3_SHA, + SSL_kEECDH, + SSL_aRSA, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C013 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_AES_128_CBC_SHA, + TLS1_CK_ECDHE_RSA_WITH_AES_128_CBC_SHA, + SSL_kEECDH, + SSL_aRSA, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C014 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_AES_256_CBC_SHA, + TLS1_CK_ECDHE_RSA_WITH_AES_256_CBC_SHA, + SSL_kEECDH, + SSL_aRSA, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher C015 */ + { + 1, + TLS1_TXT_ECDH_anon_WITH_NULL_SHA, + TLS1_CK_ECDH_anon_WITH_NULL_SHA, + SSL_kEECDH, + SSL_aNULL, + SSL_eNULL, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_STRONG_NONE|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 0, + 0, + }, + + /* Cipher C016 */ + { + 1, + TLS1_TXT_ECDH_anon_WITH_RC4_128_SHA, + TLS1_CK_ECDH_anon_WITH_RC4_128_SHA, + SSL_kEECDH, + SSL_aNULL, + SSL_RC4, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_MEDIUM, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C017 */ + { + 1, + TLS1_TXT_ECDH_anon_WITH_DES_192_CBC3_SHA, + TLS1_CK_ECDH_anon_WITH_DES_192_CBC3_SHA, + SSL_kEECDH, + SSL_aNULL, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C018 */ + { + 1, + TLS1_TXT_ECDH_anon_WITH_AES_128_CBC_SHA, + TLS1_CK_ECDH_anon_WITH_AES_128_CBC_SHA, + SSL_kEECDH, + SSL_aNULL, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C019 */ + { + 1, + TLS1_TXT_ECDH_anon_WITH_AES_256_CBC_SHA, + TLS1_CK_ECDH_anon_WITH_AES_256_CBC_SHA, + SSL_kEECDH, + SSL_aNULL, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, +#endif /* OPENSSL_NO_ECDH */ + +#ifndef OPENSSL_NO_SRP + /* Cipher C01A */ + { + 1, + TLS1_TXT_SRP_SHA_WITH_3DES_EDE_CBC_SHA, + TLS1_CK_SRP_SHA_WITH_3DES_EDE_CBC_SHA, + SSL_kSRP, + SSL_aNULL, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C01B */ + { + 1, + TLS1_TXT_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA, + TLS1_CK_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA, + SSL_kSRP, + SSL_aRSA, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C01C */ + { + 1, + TLS1_TXT_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA, + TLS1_CK_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA, + SSL_kSRP, + SSL_aDSS, + SSL_3DES, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 168, + 168, + }, + + /* Cipher C01D */ + { + 1, + TLS1_TXT_SRP_SHA_WITH_AES_128_CBC_SHA, + TLS1_CK_SRP_SHA_WITH_AES_128_CBC_SHA, + SSL_kSRP, + SSL_aNULL, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C01E */ + { + 1, + TLS1_TXT_SRP_SHA_RSA_WITH_AES_128_CBC_SHA, + TLS1_CK_SRP_SHA_RSA_WITH_AES_128_CBC_SHA, + SSL_kSRP, + SSL_aRSA, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C01F */ + { + 1, + TLS1_TXT_SRP_SHA_DSS_WITH_AES_128_CBC_SHA, + TLS1_CK_SRP_SHA_DSS_WITH_AES_128_CBC_SHA, + SSL_kSRP, + SSL_aDSS, + SSL_AES128, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 128, + 128, + }, + + /* Cipher C020 */ + { + 1, + TLS1_TXT_SRP_SHA_WITH_AES_256_CBC_SHA, + TLS1_CK_SRP_SHA_WITH_AES_256_CBC_SHA, + SSL_kSRP, + SSL_aNULL, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher C021 */ + { + 1, + TLS1_TXT_SRP_SHA_RSA_WITH_AES_256_CBC_SHA, + TLS1_CK_SRP_SHA_RSA_WITH_AES_256_CBC_SHA, + SSL_kSRP, + SSL_aRSA, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, + + /* Cipher C022 */ + { + 1, + TLS1_TXT_SRP_SHA_DSS_WITH_AES_256_CBC_SHA, + TLS1_CK_SRP_SHA_DSS_WITH_AES_256_CBC_SHA, + SSL_kSRP, + SSL_aDSS, + SSL_AES256, + SSL_SHA1, + SSL_TLSV1, + SSL_NOT_EXP|SSL_HIGH, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + 256, + 256, + }, +#endif /* OPENSSL_NO_SRP */ +#ifndef OPENSSL_NO_ECDH + + /* HMAC based TLS v1.2 ciphersuites from RFC5289 */ + + /* Cipher C023 */ + { + 1, + TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_SHA256, + TLS1_CK_ECDHE_ECDSA_WITH_AES_128_SHA256, + SSL_kEECDH, + SSL_aECDSA, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher C024 */ + { + 1, + TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_SHA384, + TLS1_CK_ECDHE_ECDSA_WITH_AES_256_SHA384, + SSL_kEECDH, + SSL_aECDSA, + SSL_AES256, + SSL_SHA384, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher C025 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_AES_128_SHA256, + TLS1_CK_ECDH_ECDSA_WITH_AES_128_SHA256, + SSL_kECDHe, + SSL_aECDH, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher C026 */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_AES_256_SHA384, + TLS1_CK_ECDH_ECDSA_WITH_AES_256_SHA384, + SSL_kECDHe, + SSL_aECDH, + SSL_AES256, + SSL_SHA384, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher C027 */ + { + 1, + TLS1_TXT_ECDHE_RSA_WITH_AES_128_SHA256, + TLS1_CK_ECDHE_RSA_WITH_AES_128_SHA256, + SSL_kEECDH, + SSL_aRSA, + SSL_AES128, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, + }, + + /* Cipher C028 */ { 1, - TLS1_TXT_ECDH_RSA_WITH_DES_192_CBC3_SHA, - TLS1_CK_ECDH_RSA_WITH_DES_192_CBC3_SHA, - SSL_kECDHr, - SSL_aECDH, - SSL_3DES, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 168, - 168, + TLS1_TXT_ECDHE_RSA_WITH_AES_256_SHA384, + TLS1_CK_ECDHE_RSA_WITH_AES_256_SHA384, + SSL_kEECDH, + SSL_aRSA, + SSL_AES256, + SSL_SHA384, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, }, - /* Cipher C00E */ + /* Cipher C029 */ { 1, - TLS1_TXT_ECDH_RSA_WITH_AES_128_CBC_SHA, - TLS1_CK_ECDH_RSA_WITH_AES_128_CBC_SHA, + TLS1_TXT_ECDH_RSA_WITH_AES_128_SHA256, + TLS1_CK_ECDH_RSA_WITH_AES_128_SHA256, SSL_kECDHr, SSL_aECDH, SSL_AES128, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_SHA256, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C00F */ + /* Cipher C02A */ { 1, - TLS1_TXT_ECDH_RSA_WITH_AES_256_CBC_SHA, - TLS1_CK_ECDH_RSA_WITH_AES_256_CBC_SHA, + TLS1_TXT_ECDH_RSA_WITH_AES_256_SHA384, + TLS1_CK_ECDH_RSA_WITH_AES_256_SHA384, SSL_kECDHr, SSL_aECDH, SSL_AES256, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_SHA384, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, 256, 256, }, - /* Cipher C010 */ + /* GCM based TLS v1.2 ciphersuites from RFC5289 */ + + /* Cipher C02B */ { 1, - TLS1_TXT_ECDHE_RSA_WITH_NULL_SHA, - TLS1_CK_ECDHE_RSA_WITH_NULL_SHA, + TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, SSL_kEECDH, - SSL_aRSA, - SSL_eNULL, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_STRONG_NONE, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 0, - 0, + SSL_aECDSA, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, + 128, + 128, }, - /* Cipher C011 */ + /* Cipher C02C */ { 1, - TLS1_TXT_ECDHE_RSA_WITH_RC4_128_SHA, - TLS1_CK_ECDHE_RSA_WITH_RC4_128_SHA, + TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, SSL_kEECDH, - SSL_aRSA, - SSL_RC4, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_MEDIUM, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_aECDSA, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, + }, + + /* Cipher C02D */ + { + 1, + TLS1_TXT_ECDH_ECDSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_ECDH_ECDSA_WITH_AES_128_GCM_SHA256, + SSL_kECDHe, + SSL_aECDH, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C012 */ + /* Cipher C02E */ { 1, - TLS1_TXT_ECDHE_RSA_WITH_DES_192_CBC3_SHA, - TLS1_CK_ECDHE_RSA_WITH_DES_192_CBC3_SHA, - SSL_kEECDH, - SSL_aRSA, - SSL_3DES, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 168, - 168, + TLS1_TXT_ECDH_ECDSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_ECDH_ECDSA_WITH_AES_256_GCM_SHA384, + SSL_kECDHe, + SSL_aECDH, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, }, - /* Cipher C013 */ + /* Cipher C02F */ { 1, - TLS1_TXT_ECDHE_RSA_WITH_AES_128_CBC_SHA, - TLS1_CK_ECDHE_RSA_WITH_AES_128_CBC_SHA, + TLS1_TXT_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_ECDHE_RSA_WITH_AES_128_GCM_SHA256, SSL_kEECDH, SSL_aRSA, - SSL_AES128, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C014 */ + /* Cipher C030 */ { 1, - TLS1_TXT_ECDHE_RSA_WITH_AES_256_CBC_SHA, - TLS1_CK_ECDHE_RSA_WITH_AES_256_CBC_SHA, + TLS1_TXT_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_ECDHE_RSA_WITH_AES_256_GCM_SHA384, SSL_kEECDH, SSL_aRSA, - SSL_AES256, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, 256, 256, }, - /* Cipher C015 */ - { - 1, - TLS1_TXT_ECDH_anon_WITH_NULL_SHA, - TLS1_CK_ECDH_anon_WITH_NULL_SHA, - SSL_kEECDH, - SSL_aNULL, - SSL_eNULL, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_STRONG_NONE, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 0, - 0, - }, - - /* Cipher C016 */ + /* Cipher C031 */ { 1, - TLS1_TXT_ECDH_anon_WITH_RC4_128_SHA, - TLS1_CK_ECDH_anon_WITH_RC4_128_SHA, - SSL_kEECDH, - SSL_aNULL, - SSL_RC4, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_MEDIUM, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256, + TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256, + SSL_kECDHr, + SSL_aECDH, + SSL_AES128GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C017 */ + /* Cipher C032 */ { 1, - TLS1_TXT_ECDH_anon_WITH_DES_192_CBC3_SHA, - TLS1_CK_ECDH_anon_WITH_DES_192_CBC3_SHA, - SSL_kEECDH, - SSL_aNULL, - SSL_3DES, - SSL_SHA1, - SSL_TLSV1, - SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, - 168, - 168, + TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384, + TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384, + SSL_kECDHr, + SSL_aECDH, + SSL_AES256GCM, + SSL_AEAD, + SSL_TLSV1_2, + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, + SSL_HANDSHAKE_MAC_SHA384|TLS1_PRF_SHA384, + 256, + 256, }, - /* Cipher C018 */ +#ifndef OPENSSL_NO_PSK + /* ECDH PSK ciphersuites from RFC 5489 */ + + /* Cipher C037 */ { 1, - TLS1_TXT_ECDH_anon_WITH_AES_128_CBC_SHA, - TLS1_CK_ECDH_anon_WITH_AES_128_CBC_SHA, + TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256, + TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256, SSL_kEECDH, - SSL_aNULL, + SSL_aPSK, SSL_AES128, - SSL_SHA1, + SSL_SHA256, SSL_TLSV1, SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256, 128, 128, }, - /* Cipher C019 */ + /* Cipher C038 */ { 1, - TLS1_TXT_ECDH_anon_WITH_AES_256_CBC_SHA, - TLS1_CK_ECDH_anon_WITH_AES_256_CBC_SHA, + TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384, + TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384, SSL_kEECDH, - SSL_aNULL, + SSL_aPSK, SSL_AES256, - SSL_SHA1, + SSL_SHA384, SSL_TLSV1, SSL_NOT_EXP|SSL_HIGH, - SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF, + SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384, 256, 256, }, -#endif /* OPENSSL_NO_ECDH */ +#endif /* OPENSSL_NO_PSK */ + +#endif /* OPENSSL_NO_ECDH */ + #ifdef TEMP_GOST_TLS /* Cipher FF00 */ @@ -2087,6 +2940,9 @@ SSL3_ENC_METHOD SSLv3_enc_data={ SSL3_MD_CLIENT_FINISHED_CONST,4, SSL3_MD_SERVER_FINISHED_CONST,4, ssl3_alert_code, + (int (*)(SSL *, unsigned char *, size_t, const char *, + size_t, const unsigned char *, size_t, + int use_context))ssl_undefined_function, }; long ssl3_default_timeout(void) @@ -2128,6 +2984,14 @@ int ssl3_new(SSL *s) s->s3=s3; +#ifndef OPENSSL_NO_SRP + SSL_SRP_CTX_init(s); +#endif +#if !defined(OPENSSL_NO_TLSEXT) + s->tlsext_channel_id_enabled = s->ctx->tlsext_channel_id_enabled; + if (s->ctx->tlsext_channel_id_private) + s->tlsext_channel_id_private = EVP_PKEY_dup(s->ctx->tlsext_channel_id_private); +#endif s->method->ssl_clear(s); return(1); err: @@ -2168,6 +3032,14 @@ void ssl3_free(SSL *s) BIO_free(s->s3->handshake_buffer); } if (s->s3->handshake_dgst) ssl3_free_digest_list(s); +#ifndef OPENSSL_NO_TLSEXT + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); +#endif + +#ifndef OPENSSL_NO_SRP + SSL_SRP_CTX_free(s); +#endif OPENSSL_cleanse(s->s3,sizeof *s->s3); OPENSSL_free(s->s3); s->s3=NULL; @@ -2177,6 +3049,7 @@ void ssl3_clear(SSL *s) { unsigned char *rp,*wp; size_t rlen, wlen; + int init_extra; #ifdef TLSEXT_TYPE_opaque_prf_input if (s->s3->client_opaque_prf_input != NULL) @@ -2210,11 +3083,17 @@ void ssl3_clear(SSL *s) s->s3->tmp.ecdh = NULL; } #endif +#ifndef OPENSSL_NO_TLSEXT +#ifndef OPENSSL_NO_EC + s->s3->is_probably_safari = 0; +#endif /* !OPENSSL_NO_EC */ +#endif /* !OPENSSL_NO_TLSEXT */ rp = s->s3->rbuf.buf; wp = s->s3->wbuf.buf; rlen = s->s3->rbuf.len; wlen = s->s3->wbuf.len; + init_extra = s->s3->init_extra; if (s->s3->handshake_buffer) { BIO_free(s->s3->handshake_buffer); s->s3->handshake_buffer = NULL; @@ -2222,11 +3101,20 @@ void ssl3_clear(SSL *s) if (s->s3->handshake_dgst) { ssl3_free_digest_list(s); } + +#if !defined(OPENSSL_NO_TLSEXT) + if (s->s3->alpn_selected) + { + free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } +#endif memset(s->s3,0,sizeof *s->s3); s->s3->rbuf.buf = rp; s->s3->wbuf.buf = wp; s->s3->rbuf.len = rlen; s->s3->wbuf.len = wlen; + s->s3->init_extra = init_extra; ssl_free_wbio_buffer(s); @@ -2245,7 +3133,18 @@ void ssl3_clear(SSL *s) s->next_proto_negotiated_len = 0; } #endif + +#if !defined(OPENSSL_NO_TLSEXT) + s->s3->tlsext_channel_id_valid = 0; +#endif + } + +#ifndef OPENSSL_NO_SRP +static char * MS_CALLBACK srp_password_from_info_cb(SSL *s, void *arg) + { + return BUF_strdup(s->srp_ctx.info) ; } +#endif long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) { @@ -2492,6 +3391,56 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) ret = 1; break; +#ifndef OPENSSL_NO_HEARTBEATS + case SSL_CTRL_TLS_EXT_SEND_HEARTBEAT: + if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) + ret = dtls1_heartbeat(s); + else + ret = tls1_heartbeat(s); + break; + + case SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING: + ret = s->tlsext_hb_pending; + break; + + case SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS: + if (larg) + s->tlsext_heartbeat |= SSL_TLSEXT_HB_DONT_RECV_REQUESTS; + else + s->tlsext_heartbeat &= ~SSL_TLSEXT_HB_DONT_RECV_REQUESTS; + ret = 1; + break; +#endif + case SSL_CTRL_CHANNEL_ID: + if (!s->server) + break; + s->tlsext_channel_id_enabled = 1; + ret = 1; + break; + + case SSL_CTRL_SET_CHANNEL_ID: + if (s->server) + break; + s->tlsext_channel_id_enabled = 1; + if (EVP_PKEY_bits(parg) != 256) + { + SSLerr(SSL_F_SSL3_CTRL,SSL_R_CHANNEL_ID_NOT_P256); + break; + } + if (s->tlsext_channel_id_private) + EVP_PKEY_free(s->tlsext_channel_id_private); + s->tlsext_channel_id_private = (EVP_PKEY*) parg; + ret = 1; + break; + + case SSL_CTRL_GET_CHANNEL_ID: + if (!s->server) + break; + if (!s->s3->tlsext_channel_id_valid) + break; + memcpy(parg, s->s3->tlsext_channel_id, larg < 64 ? larg : 64); + return 64; + #endif /* !OPENSSL_NO_TLSEXT */ default: break; @@ -2712,6 +3661,12 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) } return 1; } + case SSL_CTRL_CHANNEL_ID: + /* must be called on a server */ + if (ctx->method->ssl_accept == ssl_undefined_function) + return 0; + ctx->tlsext_channel_id_enabled=1; + return 1; #ifdef TLSEXT_TYPE_opaque_prf_input case SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG: @@ -2724,6 +3679,38 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) return 1; break; +#ifndef OPENSSL_NO_SRP + case SSL_CTRL_SET_TLS_EXT_SRP_USERNAME: + ctx->srp_ctx.srp_Mask|=SSL_kSRP; + if (ctx->srp_ctx.login != NULL) + OPENSSL_free(ctx->srp_ctx.login); + ctx->srp_ctx.login = NULL; + if (parg == NULL) + break; + if (strlen((const char *)parg) > 255 || strlen((const char *)parg) < 1) + { + SSLerr(SSL_F_SSL3_CTX_CTRL, SSL_R_INVALID_SRP_USERNAME); + return 0; + } + if ((ctx->srp_ctx.login = BUF_strdup((char *)parg)) == NULL) + { + SSLerr(SSL_F_SSL3_CTX_CTRL, ERR_R_INTERNAL_ERROR); + return 0; + } + break; + case SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD: + ctx->srp_ctx.SRP_give_srp_client_pwd_callback=srp_password_from_info_cb; + ctx->srp_ctx.info=parg; + break; + case SSL_CTRL_SET_SRP_ARG: + ctx->srp_ctx.srp_Mask|=SSL_kSRP; + ctx->srp_ctx.SRP_cb_arg=parg; + break; + + case SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH: + ctx->srp_ctx.strength=larg; + break; +#endif #endif /* !OPENSSL_NO_TLSEXT */ /* A Thawte special :-) */ @@ -2736,6 +3723,30 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) sk_X509_push(ctx->extra_certs,(X509 *)parg); break; + case SSL_CTRL_GET_EXTRA_CHAIN_CERTS: + *(STACK_OF(X509) **)parg = ctx->extra_certs; + break; + + case SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS: + if (ctx->extra_certs) + { + sk_X509_pop_free(ctx->extra_certs, X509_free); + ctx->extra_certs = NULL; + } + break; + + case SSL_CTRL_SET_CHANNEL_ID: + ctx->tlsext_channel_id_enabled = 1; + if (EVP_PKEY_bits(parg) != 256) + { + SSLerr(SSL_F_SSL3_CTX_CTRL,SSL_R_CHANNEL_ID_NOT_P256); + break; + } + if (ctx->tlsext_channel_id_private) + EVP_PKEY_free(ctx->tlsext_channel_id_private); + ctx->tlsext_channel_id_private = (EVP_PKEY*) parg; + break; + default: return(0); } @@ -2793,6 +3804,20 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx, int cmd, void (*fp)(void)) HMAC_CTX *, int))fp; break; +#ifndef OPENSSL_NO_SRP + case SSL_CTRL_SET_SRP_VERIFY_PARAM_CB: + ctx->srp_ctx.srp_Mask|=SSL_kSRP; + ctx->srp_ctx.SRP_verify_param_callback=(int (*)(SSL *,void *))fp; + break; + case SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB: + ctx->srp_ctx.srp_Mask|=SSL_kSRP; + ctx->srp_ctx.TLS_ext_srp_username_callback=(int (*)(SSL *,int *,void *))fp; + break; + case SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB: + ctx->srp_ctx.srp_Mask|=SSL_kSRP; + ctx->srp_ctx.SRP_give_srp_client_pwd_callback=(char *(*)(SSL *,void *))fp; + break; +#endif #endif default: return(0); @@ -2811,6 +3836,9 @@ const SSL_CIPHER *ssl3_get_cipher_by_char(const unsigned char *p) id=0x03000000L|((unsigned long)p[0]<<8L)|(unsigned long)p[1]; c.id=id; cp = OBJ_bsearch_ssl_cipher_id(&c, ssl3_ciphers, SSL3_NUM_CIPHERS); +#ifdef DEBUG_PRINT_UNKNOWN_CIPHERSUITES +if (cp == NULL) fprintf(stderr, "Unknown cipher ID %x\n", (p[0] << 8) | p[1]); +#endif if (cp == NULL || cp->valid == 0) return NULL; else @@ -2888,11 +3916,20 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, { c=sk_SSL_CIPHER_value(prio,i); + /* Skip TLS v1.2 only ciphersuites if lower than v1.2 */ + if ((c->algorithm_ssl & SSL_TLSV1_2) && + (TLS1_get_version(s) < TLS1_2_VERSION)) + continue; + ssl_set_cert_masks(cert,c); mask_k = cert->mask_k; mask_a = cert->mask_a; emask_k = cert->export_mask_k; emask_a = cert->export_mask_a; +#ifndef OPENSSL_NO_SRP + mask_k=cert->mask_k | s->srp_ctx.srp_Mask; + emask_k=cert->export_mask_k | s->srp_ctx.srp_Mask; +#endif #ifdef KSSL_DEBUG /* printf("ssl3_choose_cipher %d alg= %lx\n", i,c->algorithms);*/ @@ -2910,7 +3947,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, #endif /* OPENSSL_NO_KRB5 */ #ifndef OPENSSL_NO_PSK /* with PSK there must be server callback set */ - if ((alg_k & SSL_kPSK) && s->psk_server_callback == NULL) + if ((alg_a & SSL_aPSK) && s->psk_server_callback == NULL) continue; #endif /* OPENSSL_NO_PSK */ @@ -3089,6 +4126,13 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, ii=sk_SSL_CIPHER_find(allow,c); if (ii >= 0) { +#if !defined(OPENSSL_NO_EC) && !defined(OPENSSL_NO_TLSEXT) + if ((alg_k & SSL_kEECDH) && (alg_a & SSL_aECDSA) && s->s3->is_probably_safari) + { + if (!ret) ret=sk_SSL_CIPHER_value(allow,ii); + continue; + } +#endif ret=sk_SSL_CIPHER_value(allow,ii); break; } @@ -3354,4 +4398,15 @@ need to go to SSL_ST_ACCEPT. } return(ret); } - +/* If we are using TLS v1.2 or later and default SHA1+MD5 algorithms switch + * to new SHA256 PRF and handshake macs + */ +long ssl_get_algorithm2(SSL *s) + { + long alg2 = s->s3->tmp.new_cipher->algorithm2; + if (s->method->version == TLS1_2_VERSION && + alg2 == (SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF)) + return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256; + return alg2; + } + diff --git a/app/openssl/ssl/s3_pkt.c b/app/openssl/ssl/s3_pkt.c index 0d3874ae..75997ac2 100644 --- a/app/openssl/ssl/s3_pkt.c +++ b/app/openssl/ssl/s3_pkt.c @@ -115,9 +115,10 @@ #include "ssl_locl.h" #include #include +#include static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, - unsigned int len, int create_empty_fragment); + unsigned int len, char fragment, char is_fragment); static int ssl3_get_record(SSL *s); int ssl3_read_n(SSL *s, int n, int max, int extend) @@ -289,16 +290,8 @@ static int ssl3_get_record(SSL *s) unsigned char *p; unsigned char md[EVP_MAX_MD_SIZE]; short version; - int mac_size; - int clear=0; + unsigned mac_size, orig_len; size_t extra; - int decryption_failed_or_bad_record_mac = 0; - unsigned char *mac = NULL; -#if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0 - long align=SSL3_ALIGN_PAYLOAD; -#else - long align=0; -#endif rr= &(s->s3->rrec); sess=s->session; @@ -307,8 +300,7 @@ static int ssl3_get_record(SSL *s) extra=SSL3_RT_MAX_EXTRA; else extra=0; - if (!(SSL_get_mode(s) & SSL_MODE_SMALL_BUFFERS) && - extra && !s->s3->init_extra) + if (extra && !s->s3->init_extra) { /* An application error: SLS_OP_MICROSOFT_BIG_SSLV3_BUFFER * set after ssl3_setup_buffers() was done */ @@ -343,7 +335,7 @@ fprintf(stderr, "Record type=%d, Length=%d\n", rr->type, rr->length); if (version != s->version) { SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_WRONG_VERSION_NUMBER); - if ((s->version & 0xFF00) == (version & 0xFF00)) + if ((s->version & 0xFF00) == (version & 0xFF00) && !s->enc_write_ctx && !s->write_hash) /* Send back error using their minor version number :-) */ s->version = (unsigned short)version; al=SSL_AD_PROTOCOL_VERSION; @@ -357,21 +349,6 @@ fprintf(stderr, "Record type=%d, Length=%d\n", rr->type, rr->length); goto err; } - /* If we receive a valid record larger than the current buffer size, - * allocate some memory for it. - */ - if (rr->length > s->s3->rbuf.len - SSL3_RT_HEADER_LENGTH - align) - { - if ((p=OPENSSL_realloc(s->s3->rbuf.buf, rr->length + SSL3_RT_HEADER_LENGTH + align))==NULL) - { - SSLerr(SSL_F_SSL3_GET_RECORD,ERR_R_MALLOC_FAILURE); - goto err; - } - s->s3->rbuf.buf=p; - s->s3->rbuf.len=rr->length + SSL3_RT_HEADER_LENGTH + align; - s->packet= &(s->s3->rbuf.buf[0]); - } - if (rr->length > s->s3->rbuf.len - SSL3_RT_HEADER_LENGTH) { al=SSL_AD_RECORD_OVERFLOW; @@ -423,17 +400,15 @@ fprintf(stderr, "Record type=%d, Length=%d\n", rr->type, rr->length); rr->data=rr->input; enc_err = s->method->ssl3_enc->enc(s,0); - if (enc_err <= 0) + /* enc_err is: + * 0: (in non-constant time) if the record is publically invalid. + * 1: if the padding is valid + * -1: if the padding is invalid */ + if (enc_err == 0) { - if (enc_err == 0) - /* SSLerr() and ssl3_send_alert() have been called */ - goto err; - - /* Otherwise enc_err == -1, which indicates bad padding - * (rec->length has not been changed in this case). - * To minimize information leaked via timing, we will perform - * the MAC computation anyway. */ - decryption_failed_or_bad_record_mac = 1; + al=SSL_AD_DECRYPTION_FAILED; + SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_BLOCK_CIPHER_PAD_IS_WRONG); + goto f_err; } #ifdef TLS_DEBUG @@ -443,53 +418,62 @@ printf("\n"); #endif /* r->length is now the compressed data plus mac */ - if ( (sess == NULL) || - (s->enc_read_ctx == NULL) || - (EVP_MD_CTX_md(s->read_hash) == NULL)) - clear=1; - - if (!clear) + if ((sess != NULL) && + (s->enc_read_ctx != NULL) && + (EVP_MD_CTX_md(s->read_hash) != NULL)) { - /* !clear => s->read_hash != NULL => mac_size != -1 */ + /* s->read_hash != NULL => mac_size != -1 */ + unsigned char *mac = NULL; + unsigned char mac_tmp[EVP_MAX_MD_SIZE]; mac_size=EVP_MD_CTX_size(s->read_hash); - OPENSSL_assert(mac_size >= 0); + OPENSSL_assert(mac_size <= EVP_MAX_MD_SIZE); - if (rr->length > SSL3_RT_MAX_COMPRESSED_LENGTH+extra+mac_size) + /* kludge: *_cbc_remove_padding passes padding length in rr->type */ + orig_len = rr->length+((unsigned int)rr->type>>8); + + /* orig_len is the length of the record before any padding was + * removed. This is public information, as is the MAC in use, + * therefore we can safely process the record in a different + * amount of time if it's too short to possibly contain a MAC. + */ + if (orig_len < mac_size || + /* CBC records must have a padding length byte too. */ + (EVP_CIPHER_CTX_mode(s->enc_read_ctx) == EVP_CIPH_CBC_MODE && + orig_len < mac_size+1)) { -#if 0 /* OK only for stream ciphers (then rr->length is visible from ciphertext anyway) */ - al=SSL_AD_RECORD_OVERFLOW; - SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_PRE_MAC_LENGTH_TOO_LONG); + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_LENGTH_TOO_SHORT); goto f_err; -#else - decryption_failed_or_bad_record_mac = 1; -#endif } - /* check the MAC for rr->input (it's in mac_size bytes at the tail) */ - if (rr->length >= (unsigned int)mac_size) + + if (EVP_CIPHER_CTX_mode(s->enc_read_ctx) == EVP_CIPH_CBC_MODE) { + /* We update the length so that the TLS header bytes + * can be constructed correctly but we need to extract + * the MAC in constant time from within the record, + * without leaking the contents of the padding bytes. + * */ + mac = mac_tmp; + ssl3_cbc_copy_mac(mac_tmp, rr, mac_size, orig_len); rr->length -= mac_size; - mac = &rr->data[rr->length]; } else { - /* record (minus padding) is too short to contain a MAC */ -#if 0 /* OK only for stream ciphers */ - al=SSL_AD_DECODE_ERROR; - SSLerr(SSL_F_SSL3_GET_RECORD,SSL_R_LENGTH_TOO_SHORT); - goto f_err; -#else - decryption_failed_or_bad_record_mac = 1; - rr->length = 0; -#endif - } - i=s->method->ssl3_enc->mac(s,md,0); - if (i < 0 || mac == NULL || memcmp(md, mac, (size_t)mac_size) != 0) - { - decryption_failed_or_bad_record_mac = 1; + /* In this case there's no padding, so |orig_len| + * equals |rec->length| and we checked that there's + * enough bytes for |mac_size| above. */ + rr->length -= mac_size; + mac = &rr->data[rr->length]; } + + i=s->method->ssl3_enc->mac(s,md,0 /* not send */); + if (i < 0 || mac == NULL || CRYPTO_memcmp(md, mac, (size_t)mac_size) != 0) + enc_err = -1; + if (rr->length > SSL3_RT_MAX_COMPRESSED_LENGTH+extra+mac_size) + enc_err = -1; } - if (decryption_failed_or_bad_record_mac) + if (enc_err < 0) { /* A separate 'decryption_failed' alert was introduced with TLS 1.0, * SSL 3.0 only has 'bad_record_mac'. But unless a decryption @@ -598,7 +582,6 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) const unsigned char *buf=buf_; unsigned int tot,n,nw; int i; - unsigned int max_plain_length; s->rwstate=SSL_NOTHING; tot=s->s3->wnum; @@ -618,17 +601,34 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) n=(len-tot); for (;;) { - if (type == SSL3_RT_APPLICATION_DATA && (SSL_get_mode(s) & SSL_MODE_SMALL_BUFFERS)) - max_plain_length = SSL3_RT_DEFAULT_PLAIN_LENGTH; - else - max_plain_length = s->max_send_fragment; + /* max contains the maximum number of bytes that we can put + * into a record. */ + unsigned max = s->max_send_fragment; + /* fragment is true if do_ssl3_write should send the first byte + * in its own record in order to randomise a CBC IV. */ + int fragment = 0; + + if (n > 1 && + s->s3->need_record_splitting && + type == SSL3_RT_APPLICATION_DATA && + !s->s3->record_split_done) + { + fragment = 1; + /* The first byte will be in its own record, so we + * can write an extra byte. */ + max++; + /* record_split_done records that the splitting has + * been done in case we hit an SSL_WANT_WRITE condition. + * In that case, we don't need to do the split again. */ + s->s3->record_split_done = 1; + } - if (n > max_plain_length) - nw = max_plain_length; + if (n > max) + nw=max; else nw=n; - i=do_ssl3_write(s, type, &(buf[tot]), nw, 0); + i=do_ssl3_write(s, type, &(buf[tot]), nw, fragment, 0); if (i <= 0) { s->s3->wnum=tot; @@ -639,10 +639,10 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) (type == SSL3_RT_APPLICATION_DATA && (s->mode & SSL_MODE_ENABLE_PARTIAL_WRITE))) { - /* next chunk of data should get another prepended empty fragment - * in ciphersuites with known-IV weakness: */ - s->s3->empty_fragment_done = 0; - + /* next chunk of data should get another prepended, + * one-byte fragment in ciphersuites with known-IV + * weakness. */ + s->s3->record_split_done = 0; return tot+i; } @@ -651,12 +651,18 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) } } +/* do_ssl3_write writes an SSL record of the given type. If |fragment| is 1 + * then it splits the record into a one byte record and a record with the rest + * of the data in order to randomise a CBC IV. If |is_fragment| is true then + * this call resulted from do_ssl3_write calling itself in order to create that + * one byte fragment. */ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, - unsigned int len, int create_empty_fragment) + unsigned int len, char fragment, char is_fragment) { unsigned char *p,*plen; - int i,mac_size,clear=0; + int i,mac_size; int prefix_len=0; + int eivlen; long align=0; SSL3_RECORD *wr; SSL3_BUFFER *wb=&(s->s3->wbuf); @@ -680,7 +686,7 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, /* if it went, fall through and send more stuff */ } - if (len == 0 && !create_empty_fragment) + if (len == 0) return 0; wr= &(s->s3->wrec); @@ -689,10 +695,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, if ( (sess == NULL) || (s->enc_write_ctx == NULL) || (EVP_MD_CTX_md(s->write_hash) == NULL)) - clear=1; - - if (clear) + { mac_size=0; + } else { mac_size=EVP_MD_CTX_size(s->write_hash); @@ -700,54 +705,33 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, goto err; } - /* 'create_empty_fragment' is true only when this function calls itself */ - if (!clear && !create_empty_fragment && !s->s3->empty_fragment_done) + if (fragment) { /* countermeasure against known-IV weakness in CBC ciphersuites * (see http://www.openssl.org/~bodo/tls-cbc.txt) */ + prefix_len = do_ssl3_write(s, type, buf, 1 /* length */, + 0 /* fragment */, + 1 /* is_fragment */); + if (prefix_len <= 0) + goto err; - if (s->s3->need_empty_fragments && type == SSL3_RT_APPLICATION_DATA) - { - /* recursive function call with 'create_empty_fragment' set; - * this prepares and buffers the data for an empty fragment - * (these 'prefix_len' bytes are sent out later - * together with the actual payload) */ - prefix_len = do_ssl3_write(s, type, buf, 0, 1); - if (prefix_len <= 0) - goto err; - - if (prefix_len > - (SSL3_RT_HEADER_LENGTH + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD)) - { - /* insufficient space */ - SSLerr(SSL_F_DO_SSL3_WRITE, ERR_R_INTERNAL_ERROR); - goto err; - } - } - - s->s3->empty_fragment_done = 1; - } - - /* resize if necessary to hold the data. */ - if (len + SSL3_RT_DEFAULT_WRITE_OVERHEAD > wb->len) - { - if ((p=OPENSSL_realloc(wb->buf, len + SSL3_RT_DEFAULT_WRITE_OVERHEAD))==NULL) + if (prefix_len > (SSL3_RT_HEADER_LENGTH + + SSL3_RT_SEND_MAX_ENCRYPTED_OVERHEAD)) { - SSLerr(SSL_F_DO_SSL3_WRITE,ERR_R_MALLOC_FAILURE); + /* insufficient space */ + SSLerr(SSL_F_DO_SSL3_WRITE, ERR_R_INTERNAL_ERROR); goto err; } - wb->buf = p; - wb->len = len + SSL3_RT_DEFAULT_WRITE_OVERHEAD; } - if (create_empty_fragment) + if (is_fragment) { #if defined(SSL3_ALIGN_PAYLOAD) && SSL3_ALIGN_PAYLOAD!=0 - /* extra fragment would be couple of cipher blocks, - * which would be multiple of SSL3_ALIGN_PAYLOAD, so - * if we want to align the real payload, then we can - * just pretent we simply have two headers. */ - align = (long)wb->buf + 2*SSL3_RT_HEADER_LENGTH; + /* The extra fragment would be couple of cipher blocks, and + * that will be a multiple of SSL3_ALIGN_PAYLOAD. So, if we + * want to align the real payload, we can just pretend that we + * have two headers and a byte. */ + align = (long)wb->buf + 2*SSL3_RT_HEADER_LENGTH + 1; align = (-align)&(SSL3_ALIGN_PAYLOAD-1); #endif p = wb->buf + align; @@ -773,16 +757,42 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, wr->type=type; *(p++)=(s->version>>8); - *(p++)=s->version&0xff; + /* Some servers hang if iniatial client hello is larger than 256 + * bytes and record version number > TLS 1.0 + */ + if (s->state == SSL3_ST_CW_CLNT_HELLO_B + && !s->renegotiate + && TLS1_get_version(s) > TLS1_VERSION) + *(p++) = 0x1; + else + *(p++)=s->version&0xff; /* field where we are to write out packet length */ - plen=p; + plen=p; p+=2; + /* Explicit IV length, block ciphers and TLS version 1.1 or later */ + if (s->enc_write_ctx && s->version >= TLS1_1_VERSION) + { + int mode = EVP_CIPHER_CTX_mode(s->enc_write_ctx); + if (mode == EVP_CIPH_CBC_MODE) + { + eivlen = EVP_CIPHER_CTX_iv_length(s->enc_write_ctx); + if (eivlen <= 1) + eivlen = 0; + } + /* Need explicit part of IV for GCM mode */ + else if (mode == EVP_CIPH_GCM_MODE) + eivlen = EVP_GCM_TLS_EXPLICIT_IV_LEN; + else + eivlen = 0; + } + else + eivlen = 0; /* lets setup the record stuff. */ - wr->data=p; - wr->length=(int)len; - wr->input=(unsigned char *)buf; + wr->data=p + eivlen; + wr->length=(int)(len - (fragment != 0)); + wr->input=(unsigned char *)buf + (fragment != 0); /* we now 'read' from wr->input, wr->length bytes into * wr->data */ @@ -808,11 +818,19 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, if (mac_size != 0) { - if (s->method->ssl3_enc->mac(s,&(p[wr->length]),1) < 0) + if (s->method->ssl3_enc->mac(s,&(p[wr->length + eivlen]),1) < 0) goto err; wr->length+=mac_size; - wr->input=p; - wr->data=p; + } + + wr->input=p; + wr->data=p; + + if (eivlen) + { + /* if (RAND_pseudo_bytes(p, eivlen) <= 0) + goto err; */ + wr->length += eivlen; } /* ssl3_enc can only have an error on read */ @@ -827,11 +845,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, wr->type=type; /* not needed but helps for debugging */ wr->length+=SSL3_RT_HEADER_LENGTH; - if (create_empty_fragment) + if (is_fragment) { - /* we are in a recursive call; - * just return the length, don't write out anything here - */ + /* we are in a recursive call; just return the length, don't + * write out anything. */ return wr->length; } @@ -1081,6 +1098,19 @@ start: dest = s->s3->alert_fragment; dest_len = &s->s3->alert_fragment_len; } +#ifndef OPENSSL_NO_HEARTBEATS + else if (rr->type == TLS1_RT_HEARTBEAT) + { + tls1_process_heartbeat(s); + + /* Exit and notify application to read again */ + rr->length = 0; + s->rwstate=SSL_READING; + BIO_clear_retry_flags(SSL_get_rbio(s)); + BIO_set_retry_read(SSL_get_rbio(s)); + return(-1); + } +#endif if (dest_maxlen > 0) { @@ -1224,6 +1254,10 @@ start: SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_NO_RENEGOTIATION); goto f_err; } +#ifdef SSL_AD_MISSING_SRP_USERNAME + else if (alert_descr == SSL_AD_MISSING_SRP_USERNAME) + return(0); +#endif } else if (alert_level == 2) /* fatal */ { @@ -1275,6 +1309,13 @@ start: goto f_err; } + if (!(s->s3->flags & SSL3_FLAGS_CCS_OK)) + { + al=SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS); + goto f_err; + } + rr->length=0; if (s->msg_callback) @@ -1302,6 +1343,7 @@ start: #else s->state = s->server ? SSL_ST_ACCEPT : SSL_ST_CONNECT; #endif + s->renegotiate=1; s->new_session=1; } i=s->handshake_func(s); @@ -1335,8 +1377,10 @@ start: { default: #ifndef OPENSSL_NO_TLS - /* TLS just ignores unknown message types */ - if (s->version == TLS1_VERSION) + /* TLS up to v1.1 just ignores unknown message types: + * TLS v1.2 give an unexpected message alert. + */ + if (s->version >= TLS1_VERSION && s->version <= TLS1_1_VERSION) { rr->length = 0; goto start; @@ -1396,10 +1440,8 @@ err: int ssl3_do_change_cipher_spec(SSL *s) { int i; -#ifdef OPENSSL_NO_NEXTPROTONEG const char *sender; int slen; -#endif if (s->state & SSL_ST_ACCEPT) i=SSL3_CHANGE_CIPHER_SERVER_READ; @@ -1408,7 +1450,12 @@ int ssl3_do_change_cipher_spec(SSL *s) if (s->s3->tmp.key_block == NULL) { - if (s->session == NULL) + if (s->session->master_key_length == 0) + { + SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS); + return (0); + } + if (s->session == NULL) { /* might happen if dtls1_read_bytes() calls this */ SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY); @@ -1422,7 +1469,6 @@ int ssl3_do_change_cipher_spec(SSL *s) if (!s->method->ssl3_enc->change_cipher_state(s,i)) return(0); -#ifdef OPENSSL_NO_NEXTPROTONEG /* we have to record the message digest at * this point so we can get it before we read * the finished message */ @@ -1437,9 +1483,14 @@ int ssl3_do_change_cipher_spec(SSL *s) slen=s->method->ssl3_enc->client_finished_label_len; } - s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s, + i = s->method->ssl3_enc->final_finish_mac(s, sender,slen,s->s3->tmp.peer_finish_md); -#endif + if (i == 0) + { + SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC, ERR_R_INTERNAL_ERROR); + return 0; + } + s->s3->tmp.peer_finish_md_len = i; return(1); } @@ -1471,7 +1522,7 @@ int ssl3_dispatch_alert(SSL *s) void (*cb)(const SSL *ssl,int type,int val)=NULL; s->s3->alert_dispatch=0; - i = do_ssl3_write(s, SSL3_RT_ALERT, &s->s3->send_alert[0], 2, 0); + i = do_ssl3_write(s, SSL3_RT_ALERT, &s->s3->send_alert[0], 2, 0, 0); if (i <= 0) { s->s3->alert_dispatch=1; diff --git a/app/openssl/ssl/s3_srvr.c b/app/openssl/ssl/s3_srvr.c index 60591622..1976efa7 100644 --- a/app/openssl/ssl/s3_srvr.c +++ b/app/openssl/ssl/s3_srvr.c @@ -157,8 +157,11 @@ #include #include #include +#include +#include #include #include +#include #include #ifndef OPENSSL_NO_DH #include @@ -179,6 +182,32 @@ static const SSL_METHOD *ssl3_get_server_method(int ver) return(NULL); } +#ifndef OPENSSL_NO_SRP +static int ssl_check_srp_ext_ClientHello(SSL *s, int *al) + { + int ret = SSL_ERROR_NONE; + + *al = SSL_AD_UNRECOGNIZED_NAME; + + if ((s->s3->tmp.new_cipher->algorithm_mkey & SSL_kSRP) && + (s->srp_ctx.TLS_ext_srp_username_callback != NULL)) + { + if(s->srp_ctx.login == NULL) + { + /* RFC 5054 says SHOULD reject, + we do so if There is no srp login name */ + ret = SSL3_AL_FATAL; + *al = SSL_AD_UNKNOWN_PSK_IDENTITY; + } + else + { + ret = SSL_srp_server_param_with_username(s,al); + } + } + return ret; + } +#endif + IMPLEMENT_ssl3_meth_func(SSLv3_server_method, ssl3_accept, ssl_undefined_function, @@ -188,6 +217,7 @@ int ssl3_accept(SSL *s) { BUF_MEM *buf; unsigned long alg_k,Time=(unsigned long)time(NULL); + unsigned long alg_a; void (*cb)(const SSL *ssl,int type,int val)=NULL; int ret= -1; int new_state,state,skip=0; @@ -211,6 +241,18 @@ int ssl3_accept(SSL *s) return(-1); } +#ifndef OPENSSL_NO_HEARTBEATS + /* If we're awaiting a HeartbeatResponse, pretend we + * already got and don't await it anymore, because + * Heartbeats don't make sense during handshakes anyway. + */ + if (s->tlsext_hb_pending) + { + s->tlsext_hb_pending = 0; + s->tlsext_hb_seq++; + } +#endif + for (;;) { state=s->state; @@ -218,7 +260,7 @@ int ssl3_accept(SSL *s) switch (s->state) { case SSL_ST_RENEGOTIATE: - s->new_session=1; + s->renegotiate=1; /* s->state=SSL_ST_ACCEPT; */ case SSL_ST_BEFORE: @@ -258,6 +300,7 @@ int ssl3_accept(SSL *s) } s->init_num=0; + s->s3->flags &= ~SSL3_FLAGS_SGC_RESTART_DONE; if (s->state != SSL_ST_RENEGOTIATE) { @@ -313,10 +356,35 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CLNT_HELLO_C: s->shutdown=0; - ret=ssl3_get_client_hello(s); - if (ret <= 0) goto end; + if (s->rwstate != SSL_X509_LOOKUP) + { + ret=ssl3_get_client_hello(s); + if (ret <= 0) goto end; + } +#ifndef OPENSSL_NO_SRP + { + int al; + if ((ret = ssl_check_srp_ext_ClientHello(s,&al)) < 0) + { + /* callback indicates firther work to be done */ + s->rwstate=SSL_X509_LOOKUP; + goto end; + } + if (ret != SSL_ERROR_NONE) + { + ssl3_send_alert(s,SSL3_AL_FATAL,al); + /* This is not really an error but the only means to + for a client to detect whether srp is supported. */ + if (al != TLS1_AD_UNKNOWN_PSK_IDENTITY) + SSLerr(SSL_F_SSL3_ACCEPT,SSL_R_CLIENTHELLO_TLSEXT); + ret = SSL_TLSEXT_ERR_ALERT_FATAL; + ret= -1; + goto end; + } + } +#endif - s->new_session = 2; + s->renegotiate = 2; s->state=SSL3_ST_SW_SRVR_HELLO_A; s->init_num=0; break; @@ -345,9 +413,11 @@ int ssl3_accept(SSL *s) case SSL3_ST_SW_CERT_A: case SSL3_ST_SW_CERT_B: /* Check if it is anon DH or anon ECDH, */ - /* normal PSK or KRB5 */ + /* non-RSA PSK or KRB5 or SRP */ if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) - && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK) + /* Among PSK ciphersuites only RSA_PSK uses server certificate */ + && !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK && + !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA)) && !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aKRB5)) { ret=ssl3_send_server_certificate(s); @@ -376,6 +446,7 @@ int ssl3_accept(SSL *s) case SSL3_ST_SW_KEY_EXCH_A: case SSL3_ST_SW_KEY_EXCH_B: alg_k = s->s3->tmp.new_cipher->algorithm_mkey; + alg_a = s->s3->tmp.new_cipher->algorithm_auth; /* clear this, it may get reset by * send_server_key_exchange */ @@ -405,10 +476,16 @@ int ssl3_accept(SSL *s) * public key for key exchange. */ if (s->s3->tmp.use_rsa_tmp - /* PSK: send ServerKeyExchange if PSK identity - * hint if provided */ + /* PSK: send ServerKeyExchange if either: + * - PSK identity hint is provided, or + * - the key exchange is kEECDH. + */ #ifndef OPENSSL_NO_PSK - || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint) + || ((alg_a & SSL_aPSK) && ((alg_k & SSL_kEECDH) || s->session->psk_identity_hint)) +#endif +#ifndef OPENSSL_NO_SRP + /* SRP: send ServerKeyExchange */ + || (alg_k & SSL_kSRP) #endif || (alg_k & (SSL_kDHr|SSL_kDHd|SSL_kEDH)) || (alg_k & SSL_kEECDH) @@ -456,6 +533,9 @@ int ssl3_accept(SSL *s) skip=1; s->s3->tmp.cert_request=0; s->state=SSL3_ST_SW_SRVR_DONE_A; + if (s->s3->handshake_buffer) + if (!ssl3_digest_cached_records(s)) + return -1; } else { @@ -538,15 +618,26 @@ int ssl3_accept(SSL *s) * the client uses its key from the certificate * for key exchange. */ -#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG) - s->state=SSL3_ST_SR_FINISHED_A; -#else - if (s->s3->next_proto_neg_seen) - s->state=SSL3_ST_SR_NEXT_PROTO_A; - else - s->state=SSL3_ST_SR_FINISHED_A; -#endif s->init_num = 0; + s->state=SSL3_ST_SR_POST_CLIENT_CERT; + } + else if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + s->state=SSL3_ST_SR_CERT_VRFY_A; + s->init_num=0; + if (!s->session->peer) + break; + /* For TLS v1.2 freeze the handshake buffer + * at this point and digest cached records. + */ + if (!s->s3->handshake_buffer) + { + SSLerr(SSL_F_SSL3_ACCEPT,ERR_R_INTERNAL_ERROR); + return -1; + } + s->s3->flags |= TLS1_FLAGS_KEEP_HANDSHAKE; + if (!ssl3_digest_cached_records(s)) + return -1; } else { @@ -585,19 +676,33 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CERT_VRFY_B: /* we should decide if we expected this one */ + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret=ssl3_get_cert_verify(s); if (ret <= 0) goto end; -#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG) - s->state=SSL3_ST_SR_FINISHED_A; -#else - if (s->s3->next_proto_neg_seen) + s->state=SSL3_ST_SR_POST_CLIENT_CERT; + s->init_num=0; + break; + + case SSL3_ST_SR_POST_CLIENT_CERT: { + char next_proto_neg = 0; + char channel_id = 0; +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) + next_proto_neg = s->s3->next_proto_neg_seen; +# endif + channel_id = s->s3->tlsext_channel_id_valid; +#endif + + s->s3->flags |= SSL3_FLAGS_CCS_OK; + if (next_proto_neg) s->state=SSL3_ST_SR_NEXT_PROTO_A; + else if (channel_id) + s->state=SSL3_ST_SR_CHANNEL_ID_A; else s->state=SSL3_ST_SR_FINISHED_A; -#endif - s->init_num=0; break; + } #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) case SSL3_ST_SR_NEXT_PROTO_A: @@ -605,6 +710,19 @@ int ssl3_accept(SSL *s) ret=ssl3_get_next_proto(s); if (ret <= 0) goto end; s->init_num = 0; + if (s->s3->tlsext_channel_id_valid) + s->state=SSL3_ST_SR_CHANNEL_ID_A; + else + s->state=SSL3_ST_SR_FINISHED_A; + break; +#endif + +#if !defined(OPENSSL_NO_TLSEXT) + case SSL3_ST_SR_CHANNEL_ID_A: + case SSL3_ST_SR_CHANNEL_ID_B: + ret=ssl3_get_channel_id(s); + if (ret <= 0) goto end; + s->init_num = 0; s->state=SSL3_ST_SR_FINISHED_A; break; #endif @@ -614,14 +732,11 @@ int ssl3_accept(SSL *s) ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A, SSL3_ST_SR_FINISHED_B); if (ret <= 0) goto end; -#ifndef OPENSSL_NO_TLSEXT - if (s->tlsext_ticket_expected) - s->state=SSL3_ST_SW_SESSION_TICKET_A; - else if (s->hit) - s->state=SSL_ST_OK; -#else if (s->hit) s->state=SSL_ST_OK; +#ifndef OPENSSL_NO_TLSEXT + else if (s->tlsext_ticket_expected) + s->state=SSL3_ST_SW_SESSION_TICKET_A; #endif else s->state=SSL3_ST_SW_CHANGE_A; @@ -679,16 +794,7 @@ int ssl3_accept(SSL *s) if (ret <= 0) goto end; s->state=SSL3_ST_SW_FLUSH; if (s->hit) - { -#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG) - s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A; -#else - if (s->s3->next_proto_neg_seen) - s->s3->tmp.next_state=SSL3_ST_SR_NEXT_PROTO_A; - else - s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A; -#endif - } + s->s3->tmp.next_state=SSL3_ST_SR_POST_CLIENT_CERT; else s->s3->tmp.next_state=SSL_ST_OK; s->init_num=0; @@ -706,11 +812,9 @@ int ssl3_accept(SSL *s) s->init_num=0; - if (s->new_session == 2) /* skipped if we just sent a HelloRequest */ + if (s->renegotiate == 2) /* skipped if we just sent a HelloRequest */ { - /* actually not necessarily a 'new' session unless - * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is set */ - + s->renegotiate=0; s->new_session=0; ssl_update_cache(s,SSL_SESS_CACHE_SERVER); @@ -800,6 +904,13 @@ int ssl3_check_client_hello(SSL *s) s->s3->tmp.reuse_message = 1; if (s->s3->tmp.message_type == SSL3_MT_CLIENT_HELLO) { + /* We only allow the client to restart the handshake once per + * negotiation. */ + if (s->s3->flags & SSL3_FLAGS_SGC_RESTART_DONE) + { + SSLerr(SSL_F_SSL3_CHECK_CLIENT_HELLO, SSL_R_MULTIPLE_SGC_RESTARTS); + return -1; + } /* Throw away what we have done so far in the current handshake, * which will now be aborted. (A full SSL_clear would be too much.) */ #ifndef OPENSSL_NO_DH @@ -816,6 +927,7 @@ int ssl3_check_client_hello(SSL *s) s->s3->tmp.ecdh = NULL; } #endif + s->s3->flags |= SSL3_FLAGS_SGC_RESTART_DONE; return 2; } return 1; @@ -840,7 +952,8 @@ int ssl3_get_client_hello(SSL *s) * If we are SSLv3, we will respond with SSLv3, even if prompted with * TLSv1. */ - if (s->state == SSL3_ST_SR_CLNT_HELLO_A) + if (s->state == SSL3_ST_SR_CLNT_HELLO_A + ) { s->state=SSL3_ST_SR_CLNT_HELLO_B; } @@ -865,7 +978,8 @@ int ssl3_get_client_hello(SSL *s) (s->version != DTLS1_VERSION && s->client_version < s->version)) { SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_WRONG_VERSION_NUMBER); - if ((s->client_version>>8) == SSL3_VERSION_MAJOR) + if ((s->client_version>>8) == SSL3_VERSION_MAJOR && + !s->enc_write_ctx && !s->write_hash) { /* similar to ssl3_get_record, send alert using remote version number */ s->version = s->client_version; @@ -897,13 +1011,16 @@ int ssl3_get_client_hello(SSL *s) j= *(p++); s->hit=0; - /* Versions before 0.9.7 always allow session reuse during renegotiation - * (i.e. when s->new_session is true), option - * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is new with 0.9.7. - * Maybe this optional behaviour should always have been the default, - * but we cannot safely change the default behaviour (or new applications - * might be written that become totally unsecure when compiled with - * an earlier library version) + /* Versions before 0.9.7 always allow clients to resume sessions in renegotiation. + * 0.9.7 and later allow this by default, but optionally ignore resumption requests + * with flag SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION (it's a new flag rather + * than a change to default behavior so that applications relying on this for security + * won't even compile against older library versions). + * + * 1.0.1 and later also have a function SSL_renegotiate_abbreviated() to request + * renegotiation but not a new session (s->new_session remains unset): for servers, + * this essentially just means that the SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION + * setting will be ignored. */ if ((s->new_session && (s->options & SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION))) { @@ -1099,7 +1216,7 @@ int ssl3_get_client_hello(SSL *s) goto f_err; } } - if (ssl_check_clienthello_tlsext(s) <= 0) { + if (ssl_check_clienthello_tlsext_early(s) <= 0) { SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO,SSL_R_CLIENTHELLO_TLSEXT); goto err; } @@ -1109,12 +1226,9 @@ int ssl3_get_client_hello(SSL *s) * server_random before calling tls_session_secret_cb in order to allow * SessionTicket processing to use it in key derivation. */ { - unsigned long Time; unsigned char *pos; - Time=(unsigned long)time(NULL); /* Time */ pos=s->s3->server_random; - l2n(Time,pos); - if (RAND_pseudo_bytes(pos,SSL3_RANDOM_SIZE-4) <= 0) + if (ssl_fill_hello_random(s, 1, pos, SSL3_RANDOM_SIZE) <= 0) { al=SSL_AD_INTERNAL_ERROR; goto f_err; @@ -1304,8 +1418,14 @@ int ssl3_get_client_hello(SSL *s) s->s3->tmp.new_cipher=s->session->cipher; } - if (!ssl3_digest_cached_records(s)) - goto f_err; + if (TLS1_get_version(s) < TLS1_2_VERSION || !(s->verify_mode & SSL_VERIFY_PEER)) + { + if (!ssl3_digest_cached_records(s)) + { + al = SSL_AD_INTERNAL_ERROR; + goto f_err; + } + } /* we now have the following setup. * client_random @@ -1318,6 +1438,16 @@ int ssl3_get_client_hello(SSL *s) * s->tmp.new_cipher - the new cipher to use. */ + /* Handles TLS extensions that we couldn't check earlier */ + if (s->version >= SSL3_VERSION) + { + if (ssl_check_clienthello_tlsext_late(s) <= 0) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT); + goto err; + } + } + if (ret < 0) ret=1; if (0) { @@ -1335,19 +1465,13 @@ int ssl3_send_server_hello(SSL *s) unsigned char *p,*d; int i,sl; unsigned long l; -#ifdef OPENSSL_NO_TLSEXT - unsigned long Time; -#endif if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { buf=(unsigned char *)s->init_buf->data; #ifdef OPENSSL_NO_TLSEXT p=s->s3->server_random; - /* Generate server_random if it was not needed previously */ - Time=(unsigned long)time(NULL); /* Time */ - l2n(Time,p); - if (RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-4) <= 0) + if (ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE) <= 0) return -1; #endif /* Do the message type and length last */ @@ -1360,20 +1484,20 @@ int ssl3_send_server_hello(SSL *s) memcpy(p,s->s3->server_random,SSL3_RANDOM_SIZE); p+=SSL3_RANDOM_SIZE; - /* now in theory we have 3 options to sending back the - * session id. If it is a re-use, we send back the - * old session-id, if it is a new session, we send - * back the new session-id or we send back a 0 length - * session-id if we want it to be single use. - * Currently I will not implement the '0' length session-id - * 12-Jan-98 - I'll now support the '0' length stuff. - * - * We also have an additional case where stateless session - * resumption is successful: we always send back the old - * session id. In this case s->hit is non zero: this can - * only happen if stateless session resumption is succesful - * if session caching is disabled so existing functionality - * is unaffected. + /* There are several cases for the session ID to send + * back in the server hello: + * - For session reuse from the session cache, + * we send back the old session ID. + * - If stateless session reuse (using a session ticket) + * is successful, we send back the client's "session ID" + * (which doesn't actually identify the session). + * - If it is a new session, we send back the new + * session ID. + * - However, if we want the new session to be single-use, + * we send back a 0-length session ID. + * s->hit is non-zero in either case of session reuse, + * so the following won't overwrite an ID that we're supposed + * to send back. */ if (!(s->ctx->session_cache_mode & SSL_SESS_CACHE_SERVER) && !s->hit) @@ -1472,11 +1596,17 @@ int ssl3_send_server_key_exchange(SSL *s) int encodedlen = 0; int curve_id = 0; BN_CTX *bn_ctx = NULL; +#endif +#ifndef OPENSSL_NO_PSK + const char* psk_identity_hint; + size_t psk_identity_hint_len; #endif EVP_PKEY *pkey; + const EVP_MD *md = NULL; unsigned char *p,*d; int al,i; - unsigned long type; + unsigned long alg_k; + unsigned long alg_a; int n; CERT *cert; BIGNUM *r[4]; @@ -1487,15 +1617,28 @@ int ssl3_send_server_key_exchange(SSL *s) EVP_MD_CTX_init(&md_ctx); if (s->state == SSL3_ST_SW_KEY_EXCH_A) { - type=s->s3->tmp.new_cipher->algorithm_mkey; + alg_k=s->s3->tmp.new_cipher->algorithm_mkey; + alg_a=s->s3->tmp.new_cipher->algorithm_auth; cert=s->cert; buf=s->init_buf; r[0]=r[1]=r[2]=r[3]=NULL; n=0; +#ifndef OPENSSL_NO_PSK + if (alg_a & SSL_aPSK) + { + /* size for PSK identity hint */ + psk_identity_hint = s->session->psk_identity_hint; + if (psk_identity_hint) + psk_identity_hint_len = strlen(psk_identity_hint); + else + psk_identity_hint_len = 0; + n+=2+psk_identity_hint_len; + } +#endif /* !OPENSSL_NO_PSK */ #ifndef OPENSSL_NO_RSA - if (type & SSL_kRSA) + if (alg_k & SSL_kRSA) { rsa=cert->rsa_tmp; if ((rsa == NULL) && (s->cert->rsa_tmp_cb != NULL)) @@ -1522,10 +1665,9 @@ int ssl3_send_server_key_exchange(SSL *s) r[1]=rsa->e; s->s3->tmp.use_rsa_tmp=1; } - else #endif #ifndef OPENSSL_NO_DH - if (type & SSL_kEDH) + else if (alg_k & SSL_kEDH) { dhp=cert->dh_tmp; if ((dhp == NULL) && (s->cert->dh_tmp_cb != NULL)) @@ -1578,10 +1720,9 @@ int ssl3_send_server_key_exchange(SSL *s) r[1]=dh->g; r[2]=dh->pub_key; } - else #endif #ifndef OPENSSL_NO_ECDH - if (type & SSL_kEECDH) + else if (alg_k & SSL_kEECDH) { const EC_GROUP *group; @@ -1694,7 +1835,7 @@ int ssl3_send_server_key_exchange(SSL *s) * to encode the entire ServerECDHParams * structure. */ - n = 4 + encodedlen; + n += 4 + encodedlen; /* We'll generate the serverKeyExchange message * explicitly so we can set these to NULLs @@ -1704,31 +1845,46 @@ int ssl3_send_server_key_exchange(SSL *s) r[2]=NULL; r[3]=NULL; } - else #endif /* !OPENSSL_NO_ECDH */ -#ifndef OPENSSL_NO_PSK - if (type & SSL_kPSK) +#ifndef OPENSSL_NO_SRP + else if (alg_k & SSL_kSRP) + { + if ((s->srp_ctx.N == NULL) || + (s->srp_ctx.g == NULL) || + (s->srp_ctx.s == NULL) || + (s->srp_ctx.B == NULL)) { - /* reserve size for record length and PSK identity hint*/ - n+=2+strlen(s->ctx->psk_identity_hint); + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,SSL_R_MISSING_SRP_PARAM); + goto err; } - else -#endif /* !OPENSSL_NO_PSK */ + r[0]=s->srp_ctx.N; + r[1]=s->srp_ctx.g; + r[2]=s->srp_ctx.s; + r[3]=s->srp_ctx.B; + } +#endif + else if (!(alg_k & SSL_kPSK)) { al=SSL_AD_HANDSHAKE_FAILURE; SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE); goto f_err; } - for (i=0; r[i] != NULL; i++) + for (i=0; i < 4 && r[i] != NULL; i++) { nr[i]=BN_num_bytes(r[i]); +#ifndef OPENSSL_NO_SRP + if ((i == 2) && (alg_k & SSL_kSRP)) + n+=1+nr[i]; + else +#endif n+=2+nr[i]; } - if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) - && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) + if (!(alg_a & SSL_aNULL) + /* Among PSK ciphersuites only RSA uses a certificate */ + && !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA))) { - if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher)) + if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher,&md)) == NULL) { al=SSL_AD_DECODE_ERROR; @@ -1750,15 +1906,39 @@ int ssl3_send_server_key_exchange(SSL *s) d=(unsigned char *)s->init_buf->data; p= &(d[4]); - for (i=0; r[i] != NULL; i++) + for (i=0; i < 4 && r[i] != NULL; i++) { +#ifndef OPENSSL_NO_SRP + if ((i == 2) && (alg_k & SSL_kSRP)) + { + *p = nr[i]; + p++; + } + else +#endif s2n(nr[i],p); BN_bn2bin(r[i],p); p+=nr[i]; } +/* Note: ECDHE PSK ciphersuites use SSL_kEECDH and SSL_aPSK. + * When one of them is used, the server key exchange record needs to have both + * the psk_identity_hint and the ServerECDHParams. */ +#ifndef OPENSSL_NO_PSK + if (alg_a & SSL_aPSK) + { + /* copy PSK identity hint (if provided) */ + s2n(psk_identity_hint_len, p); + if (psk_identity_hint_len > 0) + { + memcpy(p, psk_identity_hint, psk_identity_hint_len); + p+=psk_identity_hint_len; + } + } +#endif /* OPENSSL_NO_PSK */ + #ifndef OPENSSL_NO_ECDH - if (type & SSL_kEECDH) + if (alg_k & SSL_kEECDH) { /* XXX: For now, we only support named (not generic) curves. * In this situation, the serverKeyExchange message has: @@ -1781,17 +1961,7 @@ int ssl3_send_server_key_exchange(SSL *s) encodedPoint = NULL; p += encodedlen; } -#endif - -#ifndef OPENSSL_NO_PSK - if (type & SSL_kPSK) - { - /* copy PSK identity hint */ - s2n(strlen(s->ctx->psk_identity_hint), p); - strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint)); - p+=strlen(s->ctx->psk_identity_hint); - } -#endif +#endif /* OPENSSL_NO_ECDH */ /* not anonymous */ if (pkey != NULL) @@ -1799,12 +1969,15 @@ int ssl3_send_server_key_exchange(SSL *s) /* n is the length of the params, they start at &(d[4]) * and p points to the space at the end. */ #ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA) + if (pkey->type == EVP_PKEY_RSA + && TLS1_get_version(s) < TLS1_2_VERSION) { q=md_buf; j=0; for (num=2; num > 0; num--) { + EVP_MD_CTX_set_flags(&md_ctx, + EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); EVP_DigestInit_ex(&md_ctx,(num == 2) ?s->ctx->md5:s->ctx->sha1, NULL); EVP_DigestUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); @@ -1825,45 +1998,42 @@ int ssl3_send_server_key_exchange(SSL *s) n+=u+2; } else -#endif -#if !defined(OPENSSL_NO_DSA) - if (pkey->type == EVP_PKEY_DSA) +#endif /* OPENSSL_NO_RSA */ + if (md) { - /* lets do DSS */ - EVP_SignInit_ex(&md_ctx,EVP_dss1(), NULL); - EVP_SignUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx,&(s->s3->server_random[0]),SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx,&(d[4]),n); - if (!EVP_SignFinal(&md_ctx,&(p[2]), - (unsigned int *)&i,pkey)) + /* For TLS1.2 and later send signature + * algorithm */ + if (TLS1_get_version(s) >= TLS1_2_VERSION) { - SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_LIB_DSA); - goto err; + if (!tls12_get_sigandhash(p, pkey, md)) + { + /* Should never happen */ + al=SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); + goto f_err; + } + p+=2; } - s2n(i,p); - n+=i+2; - } - else +#ifdef SSL_DEBUG + fprintf(stderr, "Using hash %s\n", + EVP_MD_name(md)); #endif -#if !defined(OPENSSL_NO_ECDSA) - if (pkey->type == EVP_PKEY_EC) - { - /* let's do ECDSA */ - EVP_SignInit_ex(&md_ctx,EVP_ecdsa(), NULL); + EVP_SignInit_ex(&md_ctx, md, NULL); EVP_SignUpdate(&md_ctx,&(s->s3->client_random[0]),SSL3_RANDOM_SIZE); EVP_SignUpdate(&md_ctx,&(s->s3->server_random[0]),SSL3_RANDOM_SIZE); EVP_SignUpdate(&md_ctx,&(d[4]),n); if (!EVP_SignFinal(&md_ctx,&(p[2]), (unsigned int *)&i,pkey)) { - SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_LIB_ECDSA); + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_LIB_EVP); goto err; } s2n(i,p); n+=i+2; + if (TLS1_get_version(s) >= TLS1_2_VERSION) + n+= 2; } else -#endif { /* Is this error check actually needed? */ al=SSL_AD_HANDSHAKE_FAILURE; @@ -1916,6 +2086,14 @@ int ssl3_send_certificate_request(SSL *s) p+=n; n++; + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + nl = tls12_get_req_sig_algs(s, p + 2); + s2n(nl, p); + p += nl + 2; + n += nl + 2; + } + off=n; p+=2; n+=2; @@ -1989,6 +2167,7 @@ int ssl3_get_client_key_exchange(SSL *s) int i,al,ok; long n; unsigned long alg_k; + unsigned long alg_a; unsigned char *p; #ifndef OPENSSL_NO_RSA RSA *rsa=NULL; @@ -2006,7 +2185,11 @@ int ssl3_get_client_key_exchange(SSL *s) EC_KEY *srvr_ecdh = NULL; EVP_PKEY *clnt_pub_pkey = NULL; EC_POINT *clnt_ecpoint = NULL; - BN_CTX *bn_ctx = NULL; + BN_CTX *bn_ctx = NULL; +#ifndef OPENSSL_NO_PSK + unsigned int psk_len = 0; + unsigned char psk[PSK_MAX_PSK_LEN]; +#endif /* OPENSSL_NO_PSK */ #endif n=s->method->ssl_get_message(s, @@ -2020,7 +2203,95 @@ int ssl3_get_client_key_exchange(SSL *s) p=(unsigned char *)s->init_msg; alg_k=s->s3->tmp.new_cipher->algorithm_mkey; + alg_a=s->s3->tmp.new_cipher->algorithm_auth; + +#ifndef OPENSSL_NO_PSK + if (alg_a & SSL_aPSK) + { + unsigned char *t = NULL; + unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4]; + unsigned int pre_ms_len = 0; + int psk_err = 1; + char tmp_id[PSK_MAX_IDENTITY_LEN+1]; + + al=SSL_AD_HANDSHAKE_FAILURE; + n2s(p, i); + if (n != i+2 && !(alg_k & SSL_kEECDH)) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_LENGTH_MISMATCH); + goto psk_err; + } + if (i > PSK_MAX_IDENTITY_LEN) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_DATA_LENGTH_TOO_LONG); + goto psk_err; + } + if (s->psk_server_callback == NULL) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_PSK_NO_SERVER_CB); + goto psk_err; + } + + /* Create guaranteed NUL-terminated identity + * string for the callback */ + memcpy(tmp_id, p, i); + memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i); + psk_len = s->psk_server_callback(s, tmp_id, psk, sizeof(psk)); + + if (psk_len > PSK_MAX_PSK_LEN) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto psk_err; + } + else if (psk_len == 0) + { + /* PSK related to the given identity not found */ + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_PSK_IDENTITY_NOT_FOUND); + al=SSL_AD_UNKNOWN_PSK_IDENTITY; + goto psk_err; + } + if (!(alg_k & SSL_kEECDH)) + { + /* Create the shared secret now if we're not using ECDHE-PSK.*/ + pre_ms_len=2+psk_len+2+psk_len; + t = pre_ms; + s2n(psk_len, t); + memset(t, 0, psk_len); + t+=psk_len; + s2n(psk_len, t); + memcpy(t, psk, psk_len); + + s->session->master_key_length= + s->method->ssl3_enc->generate_master_secret(s, + s->session->master_key, pre_ms, pre_ms_len); + } + if (s->session->psk_identity != NULL) + OPENSSL_free(s->session->psk_identity); + s->session->psk_identity = BUF_strdup(tmp_id); + OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1); + if (s->session->psk_identity == NULL) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + ERR_R_MALLOC_FAILURE); + goto psk_err; + } + + p += i; + n -= (i + 2); + psk_err = 0; + psk_err: + OPENSSL_cleanse(pre_ms, sizeof(pre_ms)); + if (psk_err != 0) + goto f_err; + } +#endif /* OPENSSL_NO_PSK */ + if (0) {} #ifndef OPENSSL_NO_RSA if (alg_k & SSL_kRSA) { @@ -2125,10 +2396,9 @@ int ssl3_get_client_key_exchange(SSL *s) p,i); OPENSSL_cleanse(p,i); } - else #endif #ifndef OPENSSL_NO_DH - if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd)) + else if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd)) { n2s(p,i); if (n != i+2) @@ -2175,6 +2445,7 @@ int ssl3_get_client_key_exchange(SSL *s) if (i <= 0) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_DH_LIB); + BN_clear_free(pub); goto err; } @@ -2188,10 +2459,9 @@ int ssl3_get_client_key_exchange(SSL *s) s->session->master_key,p,i); OPENSSL_cleanse(p,i); } - else #endif #ifndef OPENSSL_NO_KRB5 - if (alg_k & SSL_kKRB5) + else if (alg_k & SSL_kKRB5) { krb5_error_code krb5rc; krb5_data enc_ticket; @@ -2380,17 +2650,20 @@ int ssl3_get_client_key_exchange(SSL *s) ** if (s->kssl_ctx) s->kssl_ctx = NULL; */ } - else #endif /* OPENSSL_NO_KRB5 */ - #ifndef OPENSSL_NO_ECDH - if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe)) + else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe)) { int ret = 1; int field_size = 0; const EC_KEY *tkey; const EC_GROUP *group; const BIGNUM *priv_key; +#ifndef OPENSSL_NO_PSK + unsigned char *pre_ms; + unsigned int pre_ms_len; + unsigned char *t; +#endif /* OPENSSL_NO_PSK */ /* initialize structures for server's ECDH key pair */ if ((srvr_ecdh = EC_KEY_new()) == NULL) @@ -2486,7 +2759,7 @@ int ssl3_get_client_key_exchange(SSL *s) } /* Get encoded point length */ - i = *p; + i = *p; p += 1; if (n != 1 + i) { @@ -2528,185 +2801,155 @@ int ssl3_get_client_key_exchange(SSL *s) EC_KEY_free(srvr_ecdh); BN_CTX_free(bn_ctx); EC_KEY_free(s->s3->tmp.ecdh); - s->s3->tmp.ecdh = NULL; + s->s3->tmp.ecdh = NULL; - /* Compute the master secret */ - s->session->master_key_length = s->method->ssl3_enc-> \ - generate_master_secret(s, s->session->master_key, p, i); - - OPENSSL_cleanse(p, i); - return (ret); - } - else -#endif #ifndef OPENSSL_NO_PSK - if (alg_k & SSL_kPSK) + /* ECDHE PSK ciphersuites from RFC 5489 */ + if ((alg_a & SSL_aPSK) && psk_len != 0) { - unsigned char *t = NULL; - unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4]; - unsigned int pre_ms_len = 0, psk_len = 0; - int psk_err = 1; - char tmp_id[PSK_MAX_IDENTITY_LEN+1]; - - al=SSL_AD_HANDSHAKE_FAILURE; - - n2s(p,i); - if (n != i+2) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - SSL_R_LENGTH_MISMATCH); - goto psk_err; - } - if (i > PSK_MAX_IDENTITY_LEN) + pre_ms_len = 2+psk_len+2+i; + pre_ms = OPENSSL_malloc(pre_ms_len); + if (pre_ms == NULL) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - SSL_R_DATA_LENGTH_TOO_LONG); - goto psk_err; - } - if (s->psk_server_callback == NULL) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_NO_SERVER_CB); - goto psk_err; + ERR_R_MALLOC_FAILURE); + goto err; } + memset(pre_ms, 0, pre_ms_len); + t = pre_ms; + s2n(psk_len, t); + memcpy(t, psk, psk_len); + t += psk_len; + s2n(i, t); + memcpy(t, p, i); + s->session->master_key_length = s->method->ssl3_enc \ + -> generate_master_secret(s, + s->session->master_key, pre_ms, pre_ms_len); + OPENSSL_cleanse(pre_ms, pre_ms_len); + OPENSSL_free(pre_ms); + } +#endif /* OPENSSL_NO_PSK */ + if (!(alg_a & SSL_aPSK)) + { + /* Compute the master secret */ + s->session->master_key_length = s->method->ssl3_enc \ + -> generate_master_secret(s, + s->session->master_key, p, i); + } - /* Create guaranteed NULL-terminated identity - * string for the callback */ - memcpy(tmp_id, p, i); - memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i); - psk_len = s->psk_server_callback(s, tmp_id, - psk_or_pre_ms, sizeof(psk_or_pre_ms)); - OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1); + OPENSSL_cleanse(p, i); + } +#endif +#ifndef OPENSSL_NO_SRP + else if (alg_k & SSL_kSRP) + { + int param_len; - if (psk_len > PSK_MAX_PSK_LEN) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto psk_err; - } - else if (psk_len == 0) - { - /* PSK related to the given identity not found */ - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_IDENTITY_NOT_FOUND); - al=SSL_AD_UNKNOWN_PSK_IDENTITY; - goto psk_err; - } + n2s(p,i); + param_len=i+2; + if (param_len > n) + { + al=SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_BAD_SRP_A_LENGTH); + goto f_err; + } + if (!(s->srp_ctx.A=BN_bin2bn(p,i,NULL))) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_BN_LIB); + goto err; + } + if (s->session->srp_username != NULL) + OPENSSL_free(s->session->srp_username); + s->session->srp_username = BUF_strdup(s->srp_ctx.login); + if (s->session->srp_username == NULL) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + ERR_R_MALLOC_FAILURE); + goto err; + } - /* create PSK pre_master_secret */ - pre_ms_len=2+psk_len+2+psk_len; - t = psk_or_pre_ms; - memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len); - s2n(psk_len, t); - memset(t, 0, psk_len); - t+=psk_len; - s2n(psk_len, t); + if ((s->session->master_key_length = SRP_generate_server_master_secret(s,s->session->master_key))<0) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR); + goto err; + } - if (s->session->psk_identity != NULL) - OPENSSL_free(s->session->psk_identity); - s->session->psk_identity = BUF_strdup((char *)p); - if (s->session->psk_identity == NULL) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; - } + p+=i; + } +#endif /* OPENSSL_NO_SRP */ + else if (alg_k & SSL_kGOST) + { + int ret = 0; + EVP_PKEY_CTX *pkey_ctx; + EVP_PKEY *client_pub_pkey = NULL, *pk = NULL; + unsigned char premaster_secret[32], *start; + size_t outlen=32, inlen; + unsigned long alg_a; - if (s->session->psk_identity_hint != NULL) - OPENSSL_free(s->session->psk_identity_hint); - s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint); - if (s->ctx->psk_identity_hint != NULL && - s->session->psk_identity_hint == NULL) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; - } + /* Get our certificate private key*/ + alg_a = s->s3->tmp.new_cipher->algorithm_auth; + if (alg_a & SSL_aGOST94) + pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey; + else if (alg_a & SSL_aGOST01) + pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey; - s->session->master_key_length= - s->method->ssl3_enc->generate_master_secret(s, - s->session->master_key, psk_or_pre_ms, pre_ms_len); - psk_err = 0; - psk_err: - OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms)); - if (psk_err != 0) - goto f_err; + pkey_ctx = EVP_PKEY_CTX_new(pk,NULL); + EVP_PKEY_decrypt_init(pkey_ctx); + /* If client certificate is present and is of the same type, maybe + * use it for key exchange. Don't mind errors from + * EVP_PKEY_derive_set_peer, because it is completely valid to use + * a client certificate for authorization only. */ + client_pub_pkey = X509_get_pubkey(s->session->peer); + if (client_pub_pkey) + { + if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0) + ERR_clear_error(); + } + /* Decrypt session key */ + if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED))) + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); + goto gerr; + } + if (p[1] == 0x81) + { + start = p+3; + inlen = p[2]; + } + else if (p[1] < 0x80) + { + start = p+2; + inlen = p[1]; } else -#endif - if (alg_k & SSL_kGOST) { - int ret = 0; - EVP_PKEY_CTX *pkey_ctx; - EVP_PKEY *client_pub_pkey = NULL, *pk = NULL; - unsigned char premaster_secret[32], *start; - size_t outlen=32, inlen; - unsigned long alg_a; - - /* Get our certificate private key*/ - alg_a = s->s3->tmp.new_cipher->algorithm_auth; - if (alg_a & SSL_aGOST94) - pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey; - else if (alg_a & SSL_aGOST01) - pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey; - - pkey_ctx = EVP_PKEY_CTX_new(pk,NULL); - EVP_PKEY_decrypt_init(pkey_ctx); - /* If client certificate is present and is of the same type, maybe - * use it for key exchange. Don't mind errors from - * EVP_PKEY_derive_set_peer, because it is completely valid to use - * a client certificate for authorization only. */ - client_pub_pkey = X509_get_pubkey(s->session->peer); - if (client_pub_pkey) - { - if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0) - ERR_clear_error(); - } - /* Decrypt session key */ - if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED))) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); - goto gerr; - } - if (p[1] == 0x81) - { - start = p+3; - inlen = p[2]; - } - else if (p[1] < 0x80) - { - start = p+2; - inlen = p[1]; - } - else - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); - goto gerr; - } - if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0) + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); + goto gerr; + } + if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0) - { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); - goto gerr; - } - /* Generate master secret */ - s->session->master_key_length= - s->method->ssl3_enc->generate_master_secret(s, - s->session->master_key,premaster_secret,32); - /* Check if pubkey from client certificate was used */ - if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0) - ret = 2; - else - ret = 1; - gerr: - EVP_PKEY_free(client_pub_pkey); - EVP_PKEY_CTX_free(pkey_ctx); - if (ret) - return ret; - else - goto err; + { + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED); + goto gerr; } + /* Generate master secret */ + s->session->master_key_length= + s->method->ssl3_enc->generate_master_secret(s, + s->session->master_key,premaster_secret,32); + /* Check if pubkey from client certificate was used */ + if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0) + ret = 2; else + ret = 1; + gerr: + EVP_PKEY_free(client_pub_pkey); + EVP_PKEY_CTX_free(pkey_ctx); + if (ret) + return ret; + else + goto err; + } + else if (!(alg_k & SSL_kPSK)) { al=SSL_AD_HANDSHAKE_FAILURE; SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, @@ -2717,7 +2960,7 @@ int ssl3_get_client_key_exchange(SSL *s) return(1); f_err: ssl3_send_alert(s,SSL3_AL_FATAL,al); -#if !defined(OPENSSL_NO_DH) || !defined(OPENSSL_NO_RSA) || !defined(OPENSSL_NO_ECDH) +#if !defined(OPENSSL_NO_DH) || !defined(OPENSSL_NO_RSA) || !defined(OPENSSL_NO_ECDH) || defined(OPENSSL_NO_SRP) err: #endif #ifndef OPENSSL_NO_ECDH @@ -2738,12 +2981,15 @@ int ssl3_get_cert_verify(SSL *s) long n; int type=0,i,j; X509 *peer; + const EVP_MD *md = NULL; + EVP_MD_CTX mctx; + EVP_MD_CTX_init(&mctx); n=s->method->ssl_get_message(s, SSL3_ST_SR_CERT_VRFY_A, SSL3_ST_SR_CERT_VRFY_B, -1, - 514, /* 514? */ + 516, /* Enough for 4096 bit RSA key with TLS v1.2 */ &ok); if (!ok) return((int)n); @@ -2763,7 +3009,7 @@ int ssl3_get_cert_verify(SSL *s) if (s->s3->tmp.message_type != SSL3_MT_CERTIFICATE_VERIFY) { s->s3->tmp.reuse_message=1; - if ((peer != NULL) && (type | EVP_PKT_SIGN)) + if ((peer != NULL) && (type & EVP_PKT_SIGN)) { al=SSL_AD_UNEXPECTED_MESSAGE; SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_MISSING_VERIFY_MESSAGE); @@ -2806,6 +3052,36 @@ int ssl3_get_cert_verify(SSL *s) } else { + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + int sigalg = tls12_get_sigid(pkey); + /* Should never happen */ + if (sigalg == -1) + { + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,ERR_R_INTERNAL_ERROR); + al=SSL_AD_INTERNAL_ERROR; + goto f_err; + } + /* Check key type is consistent with signature */ + if (sigalg != (int)p[1]) + { + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_WRONG_SIGNATURE_TYPE); + al=SSL_AD_DECODE_ERROR; + goto f_err; + } + md = tls12_get_hash(p[0]); + if (md == NULL) + { + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_UNKNOWN_DIGEST); + al=SSL_AD_DECODE_ERROR; + goto f_err; + } +#ifdef SSL_DEBUG +fprintf(stderr, "USING TLSv1.2 HASH %s\n", EVP_MD_name(md)); +#endif + p += 2; + n -= 2; + } n2s(p,i); n-=2; if (i > n) @@ -2823,6 +3099,37 @@ int ssl3_get_cert_verify(SSL *s) goto f_err; } + if (TLS1_get_version(s) >= TLS1_2_VERSION) + { + long hdatalen = 0; + void *hdata; + hdatalen = BIO_get_mem_data(s->s3->handshake_buffer, &hdata); + if (hdatalen <= 0) + { + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, ERR_R_INTERNAL_ERROR); + al=SSL_AD_INTERNAL_ERROR; + goto f_err; + } +#ifdef SSL_DEBUG + fprintf(stderr, "Using TLS 1.2 with client verify alg %s\n", + EVP_MD_name(md)); +#endif + if (!EVP_VerifyInit_ex(&mctx, md, NULL) + || !EVP_VerifyUpdate(&mctx, hdata, hdatalen)) + { + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, ERR_R_EVP_LIB); + al=SSL_AD_INTERNAL_ERROR; + goto f_err; + } + + if (EVP_VerifyFinal(&mctx, p , i, pkey) <= 0) + { + al=SSL_AD_DECRYPT_ERROR; + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_BAD_SIGNATURE); + goto f_err; + } + } + else #ifndef OPENSSL_NO_RSA if (pkey->type == EVP_PKEY_RSA) { @@ -2913,6 +3220,13 @@ f_err: ssl3_send_alert(s,SSL3_AL_FATAL,al); } end: + if (s->s3->handshake_buffer) + { + BIO_free(s->s3->handshake_buffer); + s->s3->handshake_buffer = NULL; + s->s3->flags &= ~TLS1_FLAGS_KEEP_HANDSHAKE; + } + EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_free(pkey); return(ret); } @@ -3025,6 +3339,12 @@ int ssl3_get_client_certificate(SSL *s) al=SSL_AD_HANDSHAKE_FAILURE; goto f_err; } + /* No client certificate so digest cached records */ + if (s->s3->handshake_buffer && !ssl3_digest_cached_records(s)) + { + al=SSL_AD_INTERNAL_ERROR; + goto f_err; + } } else { @@ -3101,13 +3421,17 @@ int ssl3_send_server_certificate(SSL *s) /* SSL3_ST_SW_CERT_B */ return(ssl3_do_write(s,SSL3_RT_HANDSHAKE)); } + #ifndef OPENSSL_NO_TLSEXT +/* send a new session ticket (not necessarily for a new session) */ int ssl3_send_newsession_ticket(SSL *s) { if (s->state == SSL3_ST_SW_SESSION_TICKET_A) { unsigned char *p, *senc, *macstart; - int len, slen; + const unsigned char *const_p; + int len, slen_full, slen; + SSL_SESSION *sess; unsigned int hlen; EVP_CIPHER_CTX ctx; HMAC_CTX hctx; @@ -3116,12 +3440,38 @@ int ssl3_send_newsession_ticket(SSL *s) unsigned char key_name[16]; /* get session encoding length */ - slen = i2d_SSL_SESSION(s->session, NULL); + slen_full = i2d_SSL_SESSION(s->session, NULL); /* Some length values are 16 bits, so forget it if session is * too long */ - if (slen > 0xFF00) + if (slen_full > 0xFF00) return -1; + senc = OPENSSL_malloc(slen_full); + if (!senc) + return -1; + p = senc; + i2d_SSL_SESSION(s->session, &p); + + /* create a fresh copy (not shared with other threads) to clean up */ + const_p = senc; + sess = d2i_SSL_SESSION(NULL, &const_p, slen_full); + if (sess == NULL) + { + OPENSSL_free(senc); + return -1; + } + sess->session_id_length = 0; /* ID is irrelevant for the ticket */ + + slen = i2d_SSL_SESSION(sess, NULL); + if (slen > slen_full) /* shouldn't ever happen */ + { + OPENSSL_free(senc); + return -1; + } + p = senc; + i2d_SSL_SESSION(sess, &p); + SSL_SESSION_free(sess); + /* Grow buffer if need be: the length calculation is as * follows 1 (size of message name) + 3 (message length * bytes) + 4 (ticket lifetime hint) + 2 (ticket length) + @@ -3133,11 +3483,6 @@ int ssl3_send_newsession_ticket(SSL *s) 26 + EVP_MAX_IV_LENGTH + EVP_MAX_BLOCK_LENGTH + EVP_MAX_MD_SIZE + slen)) return -1; - senc = OPENSSL_malloc(slen); - if (!senc) - return -1; - p = senc; - i2d_SSL_SESSION(s->session, &p); p=(unsigned char *)s->init_buf->data; /* do the header */ @@ -3168,7 +3513,13 @@ int ssl3_send_newsession_ticket(SSL *s) tlsext_tick_md(), NULL); memcpy(key_name, tctx->tlsext_tick_key_name, 16); } - l2n(s->session->tlsext_tick_lifetime_hint, p); + + /* Ticket lifetime hint (advisory only): + * We leave this unspecified for resumed session (for simplicity), + * and guess that tickets for new sessions will live as long + * as their sessions. */ + l2n(s->hit ? 0 : s->session->timeout, p); + /* Skip ticket length for now */ p += 2; /* Output key name */ @@ -3244,13 +3595,13 @@ int ssl3_send_cert_status(SSL *s) return(ssl3_do_write(s,SSL3_RT_HANDSHAKE)); } -# ifndef OPENSSL_NO_NPN +# ifndef OPENSSL_NO_NEXTPROTONEG /* ssl3_get_next_proto reads a Next Protocol Negotiation handshake message. It * sets the next_proto member in s if found */ int ssl3_get_next_proto(SSL *s) { int ok; - unsigned proto_len, padding_len; + int proto_len, padding_len; long n; const unsigned char *p; @@ -3311,4 +3662,140 @@ int ssl3_get_next_proto(SSL *s) return 1; } # endif + +/* ssl3_get_channel_id reads and verifies a ClientID handshake message. */ +int ssl3_get_channel_id(SSL *s) + { + int ret = -1, ok; + long n; + const unsigned char *p; + unsigned short extension_type, extension_len; + EC_GROUP* p256 = NULL; + EC_KEY* key = NULL; + EC_POINT* point = NULL; + ECDSA_SIG sig; + BIGNUM x, y; + + if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0) + { + /* The first time that we're called we take the current + * handshake hash and store it. */ + EVP_MD_CTX md_ctx; + unsigned int len; + + EVP_MD_CTX_init(&md_ctx); + EVP_DigestInit_ex(&md_ctx, EVP_sha256(), NULL); + if (!tls1_channel_id_hash(&md_ctx, s)) + return -1; + len = sizeof(s->s3->tlsext_channel_id); + EVP_DigestFinal(&md_ctx, s->s3->tlsext_channel_id, &len); + EVP_MD_CTX_cleanup(&md_ctx); + } + + n = s->method->ssl_get_message(s, + SSL3_ST_SR_CHANNEL_ID_A, + SSL3_ST_SR_CHANNEL_ID_B, + SSL3_MT_ENCRYPTED_EXTENSIONS, + 2 + 2 + TLSEXT_CHANNEL_ID_SIZE, + &ok); + + if (!ok) + return((int)n); + + ssl3_finish_mac(s, (unsigned char*)s->init_buf->data, s->init_num + 4); + + /* s->state doesn't reflect whether ChangeCipherSpec has been received + * in this handshake, but s->s3->change_cipher_spec does (will be reset + * by ssl3_get_finished). */ + if (!s->s3->change_cipher_spec) + { + SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS); + return -1; + } + + if (n != 2 + 2 + TLSEXT_CHANNEL_ID_SIZE) + { + SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE); + return -1; + } + + p = (unsigned char *)s->init_msg; + + /* The payload looks like: + * uint16 extension_type + * uint16 extension_len; + * uint8 x[32]; + * uint8 y[32]; + * uint8 r[32]; + * uint8 s[32]; + */ + n2s(p, extension_type); + n2s(p, extension_len); + + if (extension_type != TLSEXT_TYPE_channel_id || + extension_len != TLSEXT_CHANNEL_ID_SIZE) + { + SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE); + return -1; + } + + p256 = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + if (!p256) + { + SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_NO_P256_SUPPORT); + return -1; + } + + BN_init(&x); + BN_init(&y); + sig.r = BN_new(); + sig.s = BN_new(); + + if (BN_bin2bn(p + 0, 32, &x) == NULL || + BN_bin2bn(p + 32, 32, &y) == NULL || + BN_bin2bn(p + 64, 32, sig.r) == NULL || + BN_bin2bn(p + 96, 32, sig.s) == NULL) + goto err; + + point = EC_POINT_new(p256); + if (!point || + !EC_POINT_set_affine_coordinates_GFp(p256, point, &x, &y, NULL)) + goto err; + + key = EC_KEY_new(); + if (!key || + !EC_KEY_set_group(key, p256) || + !EC_KEY_set_public_key(key, point)) + goto err; + + /* We stored the handshake hash in |tlsext_channel_id| the first time + * that we were called. */ + switch (ECDSA_do_verify(s->s3->tlsext_channel_id, SHA256_DIGEST_LENGTH, &sig, key)) { + case 1: + break; + case 0: + SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_CHANNEL_ID_SIGNATURE_INVALID); + s->s3->tlsext_channel_id_valid = 0; + goto err; + default: + s->s3->tlsext_channel_id_valid = 0; + goto err; + } + + memcpy(s->s3->tlsext_channel_id, p, 64); + ret = 1; + +err: + BN_free(&x); + BN_free(&y); + BN_free(sig.r); + BN_free(sig.s); + if (key) + EC_KEY_free(key); + if (point) + EC_POINT_free(point); + if (p256) + EC_GROUP_free(p256); + return ret; + } #endif diff --git a/app/openssl/ssl/srtp.h b/app/openssl/ssl/srtp.h new file mode 100644 index 00000000..c0cf33ef --- /dev/null +++ b/app/openssl/ssl/srtp.h @@ -0,0 +1,145 @@ +/* ssl/tls1.h */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ +/* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* + DTLS code by Eric Rescorla + + Copyright (C) 2006, Network Resonance, Inc. + Copyright (C) 2011, RTFM, Inc. +*/ + +#ifndef HEADER_D1_SRTP_H +#define HEADER_D1_SRTP_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#define SRTP_AES128_CM_SHA1_80 0x0001 +#define SRTP_AES128_CM_SHA1_32 0x0002 +#define SRTP_AES128_F8_SHA1_80 0x0003 +#define SRTP_AES128_F8_SHA1_32 0x0004 +#define SRTP_NULL_SHA1_80 0x0005 +#define SRTP_NULL_SHA1_32 0x0006 + +int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles); +int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles); +SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); + +STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl); +SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/app/openssl/ssl/ssl.h b/app/openssl/ssl/ssl.h index 9cb2bf93..54b0eb6c 100644 --- a/app/openssl/ssl/ssl.h +++ b/app/openssl/ssl/ssl.h @@ -252,6 +252,7 @@ extern "C" { #define SSL_TXT_kEECDH "kEECDH" #define SSL_TXT_kPSK "kPSK" #define SSL_TXT_kGOST "kGOST" +#define SSL_TXT_kSRP "kSRP" #define SSL_TXT_aRSA "aRSA" #define SSL_TXT_aDSS "aDSS" @@ -275,6 +276,7 @@ extern "C" { #define SSL_TXT_ECDSA "ECDSA" #define SSL_TXT_KRB5 "KRB5" #define SSL_TXT_PSK "PSK" +#define SSL_TXT_SRP "SRP" #define SSL_TXT_DES "DES" #define SSL_TXT_3DES "3DES" @@ -285,6 +287,7 @@ extern "C" { #define SSL_TXT_AES128 "AES128" #define SSL_TXT_AES256 "AES256" #define SSL_TXT_AES "AES" +#define SSL_TXT_AES_GCM "AESGCM" #define SSL_TXT_CAMELLIA128 "CAMELLIA128" #define SSL_TXT_CAMELLIA256 "CAMELLIA256" #define SSL_TXT_CAMELLIA "CAMELLIA" @@ -294,10 +297,14 @@ extern "C" { #define SSL_TXT_SHA "SHA" /* same as "SHA1" */ #define SSL_TXT_GOST94 "GOST94" #define SSL_TXT_GOST89MAC "GOST89MAC" +#define SSL_TXT_SHA256 "SHA256" +#define SSL_TXT_SHA384 "SHA384" #define SSL_TXT_SSLV2 "SSLv2" #define SSL_TXT_SSLV3 "SSLv3" #define SSL_TXT_TLSV1 "TLSv1" +#define SSL_TXT_TLSV1_1 "TLSv1.1" +#define SSL_TXT_TLSV1_2 "TLSv1.2" #define SSL_TXT_EXP "EXP" #define SSL_TXT_EXPORT "EXPORT" @@ -356,9 +363,29 @@ extern "C" { * in SSL_CTX. */ typedef struct ssl_st *ssl_crock_st; typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; +typedef struct ssl_method_st SSL_METHOD; +typedef struct ssl_cipher_st SSL_CIPHER; +typedef struct ssl_session_st SSL_SESSION; + +DECLARE_STACK_OF(SSL_CIPHER) + +/* SRTP protection profiles for use with the use_srtp extension (RFC 5764)*/ +typedef struct srtp_protection_profile_st + { + const char *name; + unsigned long id; + } SRTP_PROTECTION_PROFILE; + +DECLARE_STACK_OF(SRTP_PROTECTION_PROFILE) + +typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, int len, void *arg); +typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, STACK_OF(SSL_CIPHER) *peer_ciphers, SSL_CIPHER **cipher, void *arg); + + +#ifndef OPENSSL_NO_SSL_INTERN /* used to hold info on the particular ciphers used */ -typedef struct ssl_cipher_st +struct ssl_cipher_st { int valid; const char *name; /* text name */ @@ -375,15 +402,11 @@ typedef struct ssl_cipher_st unsigned long algorithm2; /* Extra flags */ int strength_bits; /* Number of bits really used */ int alg_bits; /* Number of bits for algorithm */ - } SSL_CIPHER; - -DECLARE_STACK_OF(SSL_CIPHER) + }; -typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, int len, void *arg); -typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, STACK_OF(SSL_CIPHER) *peer_ciphers, SSL_CIPHER **cipher, void *arg); /* Used to hold functions for SSLv2 or SSLv3/TLSv1 functions */ -typedef struct ssl_method_st +struct ssl_method_st { int version; int (*ssl_new)(SSL *s); @@ -416,7 +439,7 @@ typedef struct ssl_method_st int (*ssl_version)(void); long (*ssl_callback_ctrl)(SSL *s, int cb_id, void (*fp)(void)); long (*ssl_ctx_callback_ctrl)(SSL_CTX *s, int cb_id, void (*fp)(void)); - } SSL_METHOD; + }; /* Lets make this into an ASN.1 type structure as follows * SSL_SESSION_ID ::= SEQUENCE { @@ -433,14 +456,17 @@ typedef struct ssl_method_st * Session_ID_context [ 4 ] EXPLICIT OCTET STRING, -- the Session ID context * Verify_result [ 5 ] EXPLICIT INTEGER, -- X509_V_... code for `Peer' * HostName [ 6 ] EXPLICIT OCTET STRING, -- optional HostName from servername TLS extension - * ECPointFormatList [ 7 ] OCTET STRING, -- optional EC point format list from TLS extension - * PSK_identity_hint [ 8 ] EXPLICIT OCTET STRING, -- optional PSK identity hint - * PSK_identity [ 9 ] EXPLICIT OCTET STRING -- optional PSK identity + * PSK_identity_hint [ 7 ] EXPLICIT OCTET STRING, -- optional PSK identity hint + * PSK_identity [ 8 ] EXPLICIT OCTET STRING, -- optional PSK identity + * Ticket_lifetime_hint [9] EXPLICIT INTEGER, -- server's lifetime hint for session ticket + * Ticket [10] EXPLICIT OCTET STRING, -- session ticket (clients only) + * Compression_meth [11] EXPLICIT OCTET STRING, -- optional compression method + * SRP_username [ 12 ] EXPLICIT OCTET STRING -- optional SRP username * } * Look in ssl/ssl_asn1.c for more details * I'm using EXPLICIT tags so I can read the damn things using asn1parse :-). */ -typedef struct ssl_session_st +struct ssl_session_st { int ssl_version; /* what ssl version session info is * being kept in here? */ @@ -467,6 +493,9 @@ typedef struct ssl_session_st char *psk_identity_hint; char *psk_identity; #endif + /* Used to indicate that session resumption is not allowed. + * Applications can also set this bit for a new session via + * not_resumable_session_cb to disable session caching and tickets. */ int not_resumable; /* The cert is the certificate used to establish this connection */ @@ -509,11 +538,15 @@ typedef struct ssl_session_st #endif /* OPENSSL_NO_EC */ /* RFC4507 info */ unsigned char *tlsext_tick; /* Session ticket */ - size_t tlsext_ticklen; /* Session ticket length */ + size_t tlsext_ticklen; /* Session ticket length */ long tlsext_tick_lifetime_hint; /* Session lifetime hint in seconds */ #endif - } SSL_SESSION; +#ifndef OPENSSL_NO_SRP + char *srp_username; +#endif + }; +#endif #define SSL_OP_MICROSOFT_SESS_ID_BUG 0x00000001L #define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x00000002L @@ -522,21 +555,28 @@ typedef struct ssl_session_st #define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L #define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L -#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x00000040L /* no effect since 0.9.7h and 0.9.8b */ +#define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L #define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L #define SSL_OP_TLS_D5_BUG 0x00000100L #define SSL_OP_TLS_BLOCK_PADDING_BUG 0x00000200L -/* Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added - * in OpenSSL 0.9.6d. Usually (depending on the application protocol) - * the workaround is not needed. Unfortunately some broken SSL/TLS - * implementations cannot handle it at all, which is why we include - * it in SSL_OP_ALL. */ +/* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */ +#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 + +/* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the + * insertion of empty records in CBC mode, but the empty records were commonly + * misinterpreted as EOF by other TLS stacks and so this was disabled by + * SSL_OP_ALL. + * + * This has been replaced by 1/n-1 record splitting, which is enabled by + * SSL_MODE_CBC_RECORD_SPLITTING in SSL_set_mode. This involves sending a + * one-byte record rather than an empty record and has much better + * compatibility. */ #define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS 0x00000800L /* added in 0.9.6e */ /* SSL_OP_ALL: various bug workarounds that should be rather harmless. * This used to be 0x000FFFFFL before 0.9.7. */ -#define SSL_OP_ALL 0x80000FFFL +#define SSL_OP_ALL 0x80000BFFL /* DTLS options */ #define SSL_OP_NO_QUERY_MTU 0x00001000L @@ -572,11 +612,17 @@ typedef struct ssl_session_st #define SSL_OP_NO_SSLv2 0x01000000L #define SSL_OP_NO_SSLv3 0x02000000L #define SSL_OP_NO_TLSv1 0x04000000L +#define SSL_OP_NO_TLSv1_2 0x08000000L +#define SSL_OP_NO_TLSv1_1 0x10000000L +/* These next two were never actually used for anything since SSLeay + * zap so we have some more flags. + */ /* The next flag deliberately changes the ciphertest, this is a check * for the PKCS#1 attack */ -#define SSL_OP_PKCS1_CHECK_1 0x08000000L -#define SSL_OP_PKCS1_CHECK_2 0x10000000L +#define SSL_OP_PKCS1_CHECK_1 0x0 +#define SSL_OP_PKCS1_CHECK_2 0x0 + #define SSL_OP_NETSCAPE_CA_DN_BUG 0x20000000L #define SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG 0x40000000L /* Make server add server-hello extension from early version of @@ -602,13 +648,21 @@ typedef struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L -/* Use small read and write buffers: (a) lazy allocate read buffers for - * large incoming records, and (b) limit the size of outgoing records. */ -#define SSL_MODE_SMALL_BUFFERS 0x00000020L +/* Send the current time in the Random fields of the ClientHello and + * ServerHello records for compatibility with hypothetical implementations + * that require it. + */ +#define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L +#define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L /* When set, clients may send application data before receipt of CCS * and Finished. This mode enables full-handshakes to 'complete' in * one RTT. */ -#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000040L +#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L +/* When set, TLS 1.0 and SSLv3, multi-byte, CBC records will be split in two: + * the first record will contain a single byte and the second will contain the + * rest of the bytes. This effectively randomises the IV and prevents BEAST + * attacks. */ +#define SSL_MODE_CBC_RECORD_SPLITTING 0x00000100L /* Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, * they cannot be used to clear bits. */ @@ -644,12 +698,53 @@ typedef struct ssl_session_st #define SSL_get_secure_renegotiation_support(ssl) \ SSL_ctrl((ssl), SSL_CTRL_GET_RI_SUPPORT, 0, NULL) +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_heartbeat(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_TLS_EXT_SEND_HEARTBEAT,0,NULL) +#endif + void SSL_CTX_set_msg_callback(SSL_CTX *ctx, void (*cb)(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)); void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)); #define SSL_CTX_set_msg_callback_arg(ctx, arg) SSL_CTX_ctrl((ctx), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) #define SSL_set_msg_callback_arg(ssl, arg) SSL_ctrl((ssl), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) +#ifndef OPENSSL_NO_SRP + +#ifndef OPENSSL_NO_SSL_INTERN + +typedef struct srp_ctx_st + { + /* param for all the callbacks */ + void *SRP_cb_arg; + /* set client Hello login callback */ + int (*TLS_ext_srp_username_callback)(SSL *, int *, void *); + /* set SRP N/g param callback for verification */ + int (*SRP_verify_param_callback)(SSL *, void *); + /* set SRP client passwd callback */ + char *(*SRP_give_srp_client_pwd_callback)(SSL *, void *); + + char *login; + BIGNUM *N,*g,*s,*B,*A; + BIGNUM *a,*b,*v; + char *info; + int strength; + + unsigned long srp_Mask; + } SRP_CTX; + +#endif + +/* see tls_srp.c */ +int SSL_SRP_CTX_init(SSL *s); +int SSL_CTX_SRP_CTX_init(SSL_CTX *ctx); +int SSL_SRP_CTX_free(SSL *ctx); +int SSL_CTX_SRP_CTX_free(SSL_CTX *ctx); +int SSL_srp_server_param_with_username(SSL *s, int *ad); +int SRP_generate_server_master_secret(SSL *s,unsigned char *master_key); +int SRP_Calc_A_param(SSL *s); +int SRP_generate_client_master_secret(SSL *s,unsigned char *master_key); +#endif #if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WIN32) #define SSL_MAX_CERT_LIST_DEFAULT 1024*30 /* 30k max cert list :-) */ @@ -675,7 +770,11 @@ void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int con typedef int (*GEN_SESSION_CB)(const SSL *ssl, unsigned char *id, unsigned int *id_len); -typedef struct ssl_comp_st +typedef struct ssl_comp_st SSL_COMP; + +#ifndef OPENSSL_NO_SSL_INTERN + +struct ssl_comp_st { int id; const char *name; @@ -684,7 +783,7 @@ typedef struct ssl_comp_st #else char *method; #endif - } SSL_COMP; + }; DECLARE_STACK_OF(SSL_COMP) DECLARE_LHASH_OF(SSL_SESSION); @@ -829,7 +928,7 @@ struct ssl_ctx_st */ unsigned int max_send_fragment; -#ifndef OPENSSL_ENGINE +#ifndef OPENSSL_NO_ENGINE /* Engine to pass requests for client certs to */ ENGINE *client_cert_engine; @@ -857,6 +956,28 @@ struct ssl_ctx_st /* draft-rescorla-tls-opaque-prf-input-00.txt information */ int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg); void *tlsext_opaque_prf_input_callback_arg; +#endif + +#ifndef OPENSSL_NO_PSK + char *psk_identity_hint; + unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, + unsigned int max_identity_len, unsigned char *psk, + unsigned int max_psk_len); + unsigned int (*psk_server_callback)(SSL *ssl, const char *identity, + unsigned char *psk, unsigned int max_psk_len); +#endif + +#ifndef OPENSSL_NO_BUF_FREELISTS +#define SSL_MAX_BUF_FREELIST_LEN_DEFAULT 32 + unsigned int freelist_max_len; + struct ssl3_buf_freelist_st *wbuf_freelist; + struct ssl3_buf_freelist_st *rbuf_freelist; +#endif +#ifndef OPENSSL_NO_SRP + SRP_CTX srp_ctx; /* ctx for SRP authentication */ +#endif + +#ifndef OPENSSL_NO_TLSEXT # ifndef OPENSSL_NO_NEXTPROTONEG /* Next protocol negotiation information */ @@ -876,24 +997,43 @@ struct ssl_ctx_st void *arg); void *next_proto_select_cb_arg; # endif -#endif -#ifndef OPENSSL_NO_PSK - char *psk_identity_hint; - unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, - unsigned int max_identity_len, unsigned char *psk, - unsigned int max_psk_len); - unsigned int (*psk_server_callback)(SSL *ssl, const char *identity, - unsigned char *psk, unsigned int max_psk_len); + /* ALPN information + * (we are in the process of transitioning from NPN to ALPN.) */ + + /* For a server, this contains a callback function that allows the + * server to select the protocol for the connection. + * out: on successful return, this must point to the raw protocol + * name (without the length prefix). + * outlen: on successful return, this contains the length of |*out|. + * in: points to the client's list of supported protocols in + * wire-format. + * inlen: the length of |in|. */ + int (*alpn_select_cb)(SSL *s, + const unsigned char **out, + unsigned char *outlen, + const unsigned char* in, + unsigned int inlen, + void *arg); + void *alpn_select_cb_arg; + + /* For a client, this contains the list of supported protocols in wire + * format. */ + unsigned char* alpn_client_proto_list; + unsigned alpn_client_proto_list_len; + + /* SRTP profiles we are willing to do from RFC 5764 */ + STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles; + + /* If true, a client will advertise the Channel ID extension and a + * server will echo it. */ + char tlsext_channel_id_enabled; + /* The client's Channel ID private key. */ + EVP_PKEY *tlsext_channel_id_private; #endif + }; -#ifndef OPENSSL_NO_BUF_FREELISTS -#define SSL_MAX_BUF_FREELIST_LEN_DEFAULT 32 - unsigned int freelist_max_len; - struct ssl3_buf_freelist_st *wbuf_freelist; - struct ssl3_buf_freelist_st *rbuf_freelist; #endif - }; #define SSL_SESS_CACHE_OFF 0x0000 #define SSL_SESS_CACHE_CLIENT 0x0001 @@ -931,6 +1071,10 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx); SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL) #define SSL_CTX_sess_cache_full(ctx) \ SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL) +/* SSL_CTX_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_CTX_enable_tls_channel_id(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, int (*new_session_cb)(struct ssl_st *ssl,SSL_SESSION *sess)); int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx))(struct ssl_st *ssl, SSL_SESSION *sess); @@ -952,26 +1096,43 @@ void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s, int (*cb) (SSL *ssl, const unsigned char **out, unsigned int *outlen, - void *arg), void *arg); + void *arg), + void *arg); void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, - int (*cb) (SSL *ssl, unsigned char **out, + int (*cb) (SSL *ssl, + unsigned char **out, unsigned char *outlen, const unsigned char *in, - unsigned int inlen, void *arg), + unsigned int inlen, + void *arg), void *arg); int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, const unsigned char *client, unsigned int client_len); -void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, - unsigned *len); +void SSL_get0_next_proto_negotiated(const SSL *s, + const unsigned char **data, unsigned *len); #define OPENSSL_NPN_UNSUPPORTED 0 #define OPENSSL_NPN_NEGOTIATED 1 #define OPENSSL_NPN_NO_OVERLAP 2 - #endif +int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char* protos, + unsigned protos_len); +int SSL_set_alpn_protos(SSL *ssl, const unsigned char* protos, + unsigned protos_len); +void SSL_CTX_set_alpn_select_cb(SSL_CTX* ctx, + int (*cb) (SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg), + void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned *len); + #ifndef OPENSSL_NO_PSK /* the maximum length of the buffer given to callbacks containing the * resulting identity/psk */ @@ -1011,6 +1172,8 @@ const char *SSL_get_psk_identity(const SSL *s); #define SSL_MAC_FLAG_READ_MAC_STREAM 1 #define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 +#ifndef OPENSSL_NO_SSL_INTERN + struct ssl_st { /* protocol version @@ -1055,9 +1218,7 @@ struct ssl_st int server; /* are we the server side? - mostly used by SSL_clear*/ - int new_session;/* 1 if we are to use a new session. - * 2 if we are a server and are inside a handshake - * (i.e. not just sending a HelloRequest) + int new_session;/* Generate a new session or reuse an old one. * NB: For servers, the 'new' session may actually be a previously * cached session or even the previous session unless * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is set */ @@ -1154,6 +1315,10 @@ struct ssl_st #endif /* OPENSSL_NO_KRB5 */ #ifndef OPENSSL_NO_PSK + /* PSK identity hint is stored here only to enable setting a hint on an SSL object before an + * SSL_SESSION is associated with it. Once an SSL_SESSION is associated with this SSL object, + * the psk_identity_hint from the session takes precedence over this one. */ + char *psk_identity_hint; unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity, unsigned int max_identity_len, unsigned char *psk, unsigned int max_psk_len); @@ -1244,11 +1409,44 @@ struct ssl_st #endif #define session_ctx initial_ctx + + STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles; /* What we'll do */ + SRTP_PROTECTION_PROFILE *srtp_profile; /* What's been chosen */ + + unsigned int tlsext_heartbeat; /* Is use of the Heartbeat extension negotiated? + 0: disabled + 1: enabled + 2: enabled, but not allowed to send Requests + */ + unsigned int tlsext_hb_pending; /* Indicates if a HeartbeatRequest is in flight */ + unsigned int tlsext_hb_seq; /* HeartbeatRequest sequence number */ + + /* Copied from the SSL_CTX. For a server, means that we'll accept + * Channel IDs from clients. For a client, means that we'll advertise + * support. */ + char tlsext_channel_id_enabled; + /* The client's Channel ID private key. */ + EVP_PKEY *tlsext_channel_id_private; + + /* For a client, this contains the list of supported protocols in wire + * format. */ + unsigned char* alpn_client_proto_list; + unsigned alpn_client_proto_list_len; #else #define session_ctx ctx #endif /* OPENSSL_NO_TLSEXT */ + + int renegotiate;/* 1 if we are renegotiating. + * 2 if we are a server and are inside a handshake + * (i.e. not just sending a HelloRequest) */ + +#ifndef OPENSSL_NO_SRP + SRP_CTX srp_ctx; /* ctx for SRP authentication */ +#endif }; +#endif + #ifdef __cplusplus } #endif @@ -1258,6 +1456,7 @@ struct ssl_st #include /* This is mostly sslv3 with a few tweaks */ #include /* Datagram TLS */ #include +#include /* Support for the use_srtp extension */ #ifdef __cplusplus extern "C" { @@ -1304,7 +1503,7 @@ extern "C" { #define SSL_get_state(a) SSL_state(a) #define SSL_is_init_finished(a) (SSL_state(a) == SSL_ST_OK) #define SSL_in_init(a) ((SSL_state(a)&SSL_ST_INIT) && \ - !SSL_cutthrough_complete(a)) + !SSL_cutthrough_complete(a)) #define SSL_in_before(a) (SSL_state(a)&SSL_ST_BEFORE) #define SSL_in_connect_init(a) (SSL_state(a)&SSL_ST_CONNECT) #define SSL_in_accept_init(a) (SSL_state(a)&SSL_ST_ACCEPT) @@ -1476,6 +1675,23 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP 71 #define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72 + +#define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB 75 +#define SSL_CTRL_SET_SRP_VERIFY_PARAM_CB 76 +#define SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB 77 + +#define SSL_CTRL_SET_SRP_ARG 78 +#define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME 79 +#define SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH 80 +#define SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD 81 +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_CTRL_TLS_EXT_SEND_HEARTBEAT 85 +#define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING 86 +#define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS 87 +#endif +#define SSL_CTRL_CHANNEL_ID 88 +#define SSL_CTRL_GET_CHANNEL_ID 89 +#define SSL_CTRL_SET_CHANNEL_ID 90 #endif #define DTLS_CTRL_GET_TIMEOUT 73 @@ -1486,6 +1702,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_CTRL_CLEAR_OPTIONS 77 #define SSL_CTRL_CLEAR_MODE 78 +#define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 +#define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 + #define DTLSv1_get_timeout(ssl, arg) \ SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg) #define DTLSv1_handle_timeout(ssl) \ @@ -1520,8 +1739,31 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) #define SSL_set_tmp_ecdh(ssl,ecdh) \ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh) +/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client + * IDs from clients. Returns 1 on success. */ +#define SSL_enable_tls_channel_id(ctx) \ + SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL) +/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to + * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on + * success. */ +#define SSL_set1_tls_channel_id(s, private_key) \ + SSL_ctrl(s,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key) +#define SSL_CTX_set1_tls_channel_id(ctx, private_key) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key) +/* SSL_get_tls_channel_id gets the client's TLS Channel ID from a server SSL* + * and copies up to the first |channel_id_len| bytes into |channel_id|. The + * Channel ID consists of the client's P-256 public key as an (x,y) pair where + * each is a 32-byte, big-endian field element. Returns 0 if the client didn't + * offer a Channel ID and the length of the complete Channel ID otherwise. */ +#define SSL_get_tls_channel_id(ctx, channel_id, channel_id_len) \ + SSL_ctrl(ctx,SSL_CTRL_GET_CHANNEL_ID,channel_id_len,(void*)channel_id) + #define SSL_CTX_add_extra_chain_cert(ctx,x509) \ SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509) +#define SSL_CTX_get_extra_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +#define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) #ifndef OPENSSL_NO_BIO BIO_METHOD *BIO_f_ssl(void); @@ -1549,7 +1791,8 @@ const SSL_CIPHER *SSL_get_current_cipher(const SSL *s); int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits); char * SSL_CIPHER_get_version(const SSL_CIPHER *c); const char * SSL_CIPHER_get_name(const SSL_CIPHER *c); -const char * SSL_CIPHER_authentication_method(const SSL_CIPHER *c); +unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c); +const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher); int SSL_get_fd(const SSL *s); int SSL_get_rfd(const SSL *s); @@ -1619,11 +1862,15 @@ long SSL_SESSION_set_time(SSL_SESSION *s, long t); long SSL_SESSION_get_timeout(const SSL_SESSION *s); long SSL_SESSION_set_timeout(SSL_SESSION *s, long t); void SSL_copy_session_id(SSL *to,const SSL *from); +X509 *SSL_SESSION_get0_peer(SSL_SESSION *s); +int SSL_SESSION_set1_id_context(SSL_SESSION *s,const unsigned char *sid_ctx, + unsigned int sid_ctx_len); SSL_SESSION *SSL_SESSION_new(void); const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, unsigned int *len); const char * SSL_SESSION_get_version(const SSL_SESSION *s); +unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s); #ifndef OPENSSL_NO_FP_API int SSL_SESSION_print_fp(FILE *fp,const SSL_SESSION *ses); #endif @@ -1687,6 +1934,30 @@ int SSL_set_trust(SSL *s, int trust); int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); +#ifndef OPENSSL_NO_SRP +int SSL_CTX_set_srp_username(SSL_CTX *ctx,char *name); +int SSL_CTX_set_srp_password(SSL_CTX *ctx,char *password); +int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength); +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, + char *(*cb)(SSL *,void *)); +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, + int (*cb)(SSL *,void *)); +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb)(SSL *,int *,void *)); +int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg); + +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info); +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, + const char *grp); + +BIGNUM *SSL_get_srp_g(SSL *s); +BIGNUM *SSL_get_srp_N(SSL *s); + +char *SSL_get_srp_username(SSL *s); +char *SSL_get_srp_userinfo(SSL *s); +#endif + void SSL_free(SSL *ssl); int SSL_accept(SSL *ssl); int SSL_connect(SSL *ssl); @@ -1722,6 +1993,15 @@ const SSL_METHOD *TLSv1_method(void); /* TLSv1.0 */ const SSL_METHOD *TLSv1_server_method(void); /* TLSv1.0 */ const SSL_METHOD *TLSv1_client_method(void); /* TLSv1.0 */ +const SSL_METHOD *TLSv1_1_method(void); /* TLSv1.1 */ +const SSL_METHOD *TLSv1_1_server_method(void); /* TLSv1.1 */ +const SSL_METHOD *TLSv1_1_client_method(void); /* TLSv1.1 */ + +const SSL_METHOD *TLSv1_2_method(void); /* TLSv1.2 */ +const SSL_METHOD *TLSv1_2_server_method(void); /* TLSv1.2 */ +const SSL_METHOD *TLSv1_2_client_method(void); /* TLSv1.2 */ + + const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_server_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_client_method(void); /* DTLSv1.0 */ @@ -1730,6 +2010,7 @@ STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); int SSL_do_handshake(SSL *s); int SSL_renegotiate(SSL *s); +int SSL_renegotiate_abbreviated(SSL *s); int SSL_renegotiate_pending(SSL *s); int SSL_shutdown(SSL *s); @@ -1781,6 +2062,7 @@ void SSL_set_info_callback(SSL *ssl, void (*cb)(const SSL *ssl,int type,int val)); void (*SSL_get_info_callback(const SSL *ssl))(const SSL *ssl,int type,int val); int SSL_state(const SSL *ssl); +void SSL_set_state(SSL *ssl, int state); void SSL_set_verify_result(SSL *ssl,long v); long SSL_get_verify_result(const SSL *ssl); @@ -1881,6 +2163,9 @@ int SSL_set_session_ticket_ext_cb(SSL *s, tls_session_ticket_ext_cb_fn cb, /* Pre-shared secret session resumption functions */ int SSL_set_session_secret_cb(SSL *s, tls_session_secret_cb_fn tls_session_secret_cb, void *arg); +void SSL_set_debug(SSL *s, int debug); +int SSL_cache_hit(SSL *s); + /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. @@ -1900,6 +2185,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_DTLS1_ACCEPT 246 #define SSL_F_DTLS1_ADD_CERT_TO_BUF 295 #define SSL_F_DTLS1_BUFFER_RECORD 247 +#define SSL_F_DTLS1_CHECK_TIMEOUT_NUM 316 #define SSL_F_DTLS1_CLIENT_HELLO 248 #define SSL_F_DTLS1_CONNECT 249 #define SSL_F_DTLS1_ENC 250 @@ -1908,6 +2194,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_DTLS1_GET_MESSAGE_FRAGMENT 253 #define SSL_F_DTLS1_GET_RECORD 254 #define SSL_F_DTLS1_HANDLE_TIMEOUT 297 +#define SSL_F_DTLS1_HEARTBEAT 305 #define SSL_F_DTLS1_OUTPUT_CERT_CHAIN 255 #define SSL_F_DTLS1_PREPROCESS_FRAGMENT 288 #define SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE 256 @@ -1957,6 +2244,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_CALLBACK_CTRL 233 #define SSL_F_SSL3_CHANGE_CIPHER_STATE 129 #define SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM 130 +#define SSL_F_SSL3_CHECK_CLIENT_HELLO 304 #define SSL_F_SSL3_CLIENT_HELLO 131 #define SSL_F_SSL3_CONNECT 132 #define SSL_F_SSL3_CTRL 213 @@ -1968,6 +2256,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_GET_CERTIFICATE_REQUEST 135 #define SSL_F_SSL3_GET_CERT_STATUS 289 #define SSL_F_SSL3_GET_CERT_VERIFY 136 +#define SSL_F_SSL3_GET_CHANNEL_ID 317 #define SSL_F_SSL3_GET_CLIENT_CERTIFICATE 137 #define SSL_F_SSL3_GET_CLIENT_HELLO 138 #define SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE 139 @@ -1975,7 +2264,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_GET_KEY_EXCHANGE 141 #define SSL_F_SSL3_GET_MESSAGE 142 #define SSL_F_SSL3_GET_NEW_SESSION_TICKET 283 -#define SSL_F_SSL3_GET_NEXT_PROTO 304 +#define SSL_F_SSL3_GET_NEXT_PROTO 306 #define SSL_F_SSL3_GET_RECORD 143 #define SSL_F_SSL3_GET_SERVER_CERTIFICATE 144 #define SSL_F_SSL3_GET_SERVER_DONE 145 @@ -1987,6 +2276,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_READ_BYTES 148 #define SSL_F_SSL3_READ_N 149 #define SSL_F_SSL3_SEND_CERTIFICATE_REQUEST 150 +#define SSL_F_SSL3_SEND_CHANNEL_ID 318 #define SSL_F_SSL3_SEND_CLIENT_CERTIFICATE 151 #define SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE 152 #define SSL_F_SSL3_SEND_CLIENT_VERIFY 153 @@ -2000,10 +2290,12 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL3_WRITE_PENDING 159 #define SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT 298 #define SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT 277 +#define SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT 307 #define SSL_F_SSL_ADD_DIR_CERT_SUBJECTS_TO_STACK 215 #define SSL_F_SSL_ADD_FILE_CERT_SUBJECTS_TO_STACK 216 #define SSL_F_SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT 299 #define SSL_F_SSL_ADD_SERVERHELLO_TLSEXT 278 +#define SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT 308 #define SSL_F_SSL_BAD_METHOD 160 #define SSL_F_SSL_BYTES_TO_CIPHER_LIST 161 #define SSL_F_SSL_CERT_DUP 221 @@ -2020,6 +2312,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_CREATE_CIPHER_LIST 166 #define SSL_F_SSL_CTRL 232 #define SSL_F_SSL_CTX_CHECK_PRIVATE_KEY 168 +#define SSL_F_SSL_CTX_MAKE_PROFILES 309 #define SSL_F_SSL_CTX_NEW 169 #define SSL_F_SSL_CTX_SET_CIPHER_LIST 269 #define SSL_F_SSL_CTX_SET_CLIENT_CERT_ENGINE 290 @@ -2042,14 +2335,17 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_GET_NEW_SESSION 181 #define SSL_F_SSL_GET_PREV_SESSION 217 #define SSL_F_SSL_GET_SERVER_SEND_CERT 182 +#define SSL_F_SSL_GET_SERVER_SEND_PKEY 317 #define SSL_F_SSL_GET_SIGN_PKEY 183 #define SSL_F_SSL_INIT_WBIO_BUFFER 184 #define SSL_F_SSL_LOAD_CLIENT_CA_FILE 185 #define SSL_F_SSL_NEW 186 #define SSL_F_SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT 300 #define SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT 302 +#define SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT 310 #define SSL_F_SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT 301 #define SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT 303 +#define SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT 311 #define SSL_F_SSL_PEEK 270 #define SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT 281 #define SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT 282 @@ -2058,6 +2354,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_RSA_PUBLIC_ENCRYPT 188 #define SSL_F_SSL_SESSION_NEW 189 #define SSL_F_SSL_SESSION_PRINT_FP 190 +#define SSL_F_SSL_SESSION_SET1_ID_CONTEXT 312 #define SSL_F_SSL_SESS_CERT_NEW 225 #define SSL_F_SSL_SET_CERT 191 #define SSL_F_SSL_SET_CIPHER_LIST 271 @@ -2071,6 +2368,7 @@ void ERR_load_SSL_strings(void); #define SSL_F_SSL_SET_TRUST 228 #define SSL_F_SSL_SET_WFD 196 #define SSL_F_SSL_SHUTDOWN 224 +#define SSL_F_SSL_SRP_CTX_INIT 313 #define SSL_F_SSL_UNDEFINED_CONST_FUNCTION 243 #define SSL_F_SSL_UNDEFINED_FUNCTION 197 #define SSL_F_SSL_UNDEFINED_VOID_FUNCTION 244 @@ -2091,6 +2389,8 @@ void ERR_load_SSL_strings(void); #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 #define SSL_F_TLS1_ENC 210 +#define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 +#define SSL_F_TLS1_HEARTBEAT 315 #define SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT 275 #define SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT 276 #define SSL_F_TLS1_PRF 284 @@ -2130,6 +2430,13 @@ void ERR_load_SSL_strings(void); #define SSL_R_BAD_RSA_MODULUS_LENGTH 121 #define SSL_R_BAD_RSA_SIGNATURE 122 #define SSL_R_BAD_SIGNATURE 123 +#define SSL_R_BAD_SRP_A_LENGTH 347 +#define SSL_R_BAD_SRP_B_LENGTH 348 +#define SSL_R_BAD_SRP_G_LENGTH 349 +#define SSL_R_BAD_SRP_N_LENGTH 350 +#define SSL_R_BAD_SRP_S_LENGTH 351 +#define SSL_R_BAD_SRTP_MKI_VALUE 352 +#define SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST 353 #define SSL_R_BAD_SSL_FILETYPE 124 #define SSL_R_BAD_SSL_SESSION_ID_LENGTH 125 #define SSL_R_BAD_STATE 126 @@ -2137,12 +2444,15 @@ void ERR_load_SSL_strings(void); #define SSL_R_BIO_NOT_SET 128 #define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG 129 #define SSL_R_BN_LIB 130 +#define SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY 376 #define SSL_R_CA_DN_LENGTH_MISMATCH 131 #define SSL_R_CA_DN_TOO_LONG 132 #define SSL_R_CCS_RECEIVED_EARLY 133 #define SSL_R_CERTIFICATE_VERIFY_FAILED 134 #define SSL_R_CERT_LENGTH_MISMATCH 135 #define SSL_R_CHALLENGE_IS_DIFFERENT 136 +#define SSL_R_CHANNEL_ID_NOT_P256 375 +#define SSL_R_CHANNEL_ID_SIGNATURE_INVALID 371 #define SSL_R_CIPHER_CODE_WRONG_LENGTH 137 #define SSL_R_CIPHER_OR_HASH_UNAVAILABLE 138 #define SSL_R_CIPHER_TABLE_SRC_ERROR 139 @@ -2155,6 +2465,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_CONNECTION_ID_IS_DIFFERENT 143 #define SSL_R_CONNECTION_TYPE_NOT_SET 144 #define SSL_R_COOKIE_MISMATCH 308 +#define SSL_R_D2I_ECDSA_SIG 379 #define SSL_R_DATA_BETWEEN_CCS_AND_FINISHED 145 #define SSL_R_DATA_LENGTH_TOO_LONG 146 #define SSL_R_DECRYPTION_FAILED 147 @@ -2168,14 +2479,18 @@ void ERR_load_SSL_strings(void); #define SSL_R_ECC_CERT_SHOULD_HAVE_RSA_SIGNATURE 322 #define SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE 323 #define SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER 310 +#define SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST 354 #define SSL_R_ENCRYPTED_LENGTH_TOO_LONG 150 #define SSL_R_ERROR_GENERATING_TMP_RSA_KEY 282 #define SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST 151 +#define SSL_R_EVP_DIGESTSIGNFINAL_FAILED 377 +#define SSL_R_EVP_DIGESTSIGNINIT_FAILED 378 #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152 #define SSL_R_EXTRA_DATA_IN_MESSAGE 153 #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154 -#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 346 -#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 347 +#define SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS 372 +#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 355 +#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 356 #define SSL_R_HTTPS_PROXY_REQUEST 155 #define SSL_R_HTTP_REQUEST 156 #define SSL_R_ILLEGAL_PADDING 283 @@ -2183,7 +2498,9 @@ void ERR_load_SSL_strings(void); #define SSL_R_INVALID_CHALLENGE_LENGTH 158 #define SSL_R_INVALID_COMMAND 280 #define SSL_R_INVALID_COMPRESSION_ALGORITHM 341 +#define SSL_R_INVALID_MESSAGE 374 #define SSL_R_INVALID_PURPOSE 278 +#define SSL_R_INVALID_SRP_USERNAME 357 #define SSL_R_INVALID_STATUS_RESPONSE 328 #define SSL_R_INVALID_TICKET_KEYS_LENGTH 325 #define SSL_R_INVALID_TRUST 279 @@ -2213,11 +2530,13 @@ void ERR_load_SSL_strings(void); #define SSL_R_MISSING_RSA_CERTIFICATE 168 #define SSL_R_MISSING_RSA_ENCRYPTING_CERT 169 #define SSL_R_MISSING_RSA_SIGNING_CERT 170 +#define SSL_R_MISSING_SRP_PARAM 358 #define SSL_R_MISSING_TMP_DH_KEY 171 #define SSL_R_MISSING_TMP_ECDH_KEY 311 #define SSL_R_MISSING_TMP_RSA_KEY 172 #define SSL_R_MISSING_TMP_RSA_PKEY 173 #define SSL_R_MISSING_VERIFY_MESSAGE 174 +#define SSL_R_MULTIPLE_SGC_RESTARTS 346 #define SSL_R_NON_SSLV2_INITIAL_PACKET 175 #define SSL_R_NO_CERTIFICATES_RETURNED 176 #define SSL_R_NO_CERTIFICATE_ASSIGNED 177 @@ -2234,6 +2553,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_NO_COMPRESSION_SPECIFIED 187 #define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER 330 #define SSL_R_NO_METHOD_SPECIFIED 188 +#define SSL_R_NO_P256_SUPPORT 373 #define SSL_R_NO_PRIVATEKEY 189 #define SSL_R_NO_PRIVATE_KEY_ASSIGNED 190 #define SSL_R_NO_PROTOCOLS_AVAILABLE 191 @@ -2241,6 +2561,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_NO_RENEGOTIATION 339 #define SSL_R_NO_REQUIRED_DIGEST 324 #define SSL_R_NO_SHARED_CIPHER 193 +#define SSL_R_NO_SRTP_PROFILES 359 #define SSL_R_NO_VERIFY_CALLBACK 194 #define SSL_R_NULL_SSL_CTX 195 #define SSL_R_NULL_SSL_METHOD_PASSED 196 @@ -2285,7 +2606,12 @@ void ERR_load_SSL_strings(void); #define SSL_R_SESSION_ID_CONTEXT_UNINITIALIZED 277 #define SSL_R_SESSION_MAY_NOT_BE_CREATED 2000 #define SSL_R_SHORT_READ 219 +#define SSL_R_SIGNATURE_ALGORITHMS_ERROR 360 #define SSL_R_SIGNATURE_FOR_NON_SIGNING_CERTIFICATE 220 +#define SSL_R_SRP_A_CALC 361 +#define SSL_R_SRTP_COULD_NOT_ALLOCATE_PROFILES 362 +#define SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG 363 +#define SSL_R_SRTP_UNKNOWN_PROTECTION_PROFILE 364 #define SSL_R_SSL23_DOING_SESSION_ID_REUSE 221 #define SSL_R_SSL2_CONNECTION_ID_TOO_LONG 299 #define SSL_R_SSL3_EXT_INVALID_ECPOINTFORMAT 321 @@ -2330,6 +2656,9 @@ void ERR_load_SSL_strings(void); #define SSL_R_TLSV1_UNRECOGNIZED_NAME 1112 #define SSL_R_TLSV1_UNSUPPORTED_EXTENSION 1110 #define SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER 232 +#define SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT 365 +#define SSL_R_TLS_HEARTBEAT_PENDING 366 +#define SSL_R_TLS_ILLEGAL_EXPORTER_LABEL 367 #define SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST 157 #define SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST 233 #define SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG 234 @@ -2351,6 +2680,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_UNKNOWN_CERTIFICATE_TYPE 247 #define SSL_R_UNKNOWN_CIPHER_RETURNED 248 #define SSL_R_UNKNOWN_CIPHER_TYPE 249 +#define SSL_R_UNKNOWN_DIGEST 368 #define SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE 250 #define SSL_R_UNKNOWN_PKEY_TYPE 251 #define SSL_R_UNKNOWN_PROTOCOL 252 @@ -2365,16 +2695,19 @@ void ERR_load_SSL_strings(void); #define SSL_R_UNSUPPORTED_PROTOCOL 258 #define SSL_R_UNSUPPORTED_SSL_VERSION 259 #define SSL_R_UNSUPPORTED_STATUS_TYPE 329 +#define SSL_R_USE_SRTP_NOT_NEGOTIATED 369 #define SSL_R_WRITE_BIO_NOT_SET 260 #define SSL_R_WRONG_CIPHER_RETURNED 261 #define SSL_R_WRONG_MESSAGE_TYPE 262 #define SSL_R_WRONG_NUMBER_OF_KEY_BITS 263 #define SSL_R_WRONG_SIGNATURE_LENGTH 264 #define SSL_R_WRONG_SIGNATURE_SIZE 265 +#define SSL_R_WRONG_SIGNATURE_TYPE 370 #define SSL_R_WRONG_SSL_VERSION 266 #define SSL_R_WRONG_VERSION_NUMBER 267 #define SSL_R_X509_LIB 268 #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269 +#define SSL_R_UNEXPECTED_CCS 388 #ifdef __cplusplus } diff --git a/app/openssl/ssl/ssl2.h b/app/openssl/ssl/ssl2.h index 99a52ea0..eb25dcb0 100644 --- a/app/openssl/ssl/ssl2.h +++ b/app/openssl/ssl/ssl2.h @@ -155,6 +155,8 @@ extern "C" { #define CERT char #endif +#ifndef OPENSSL_NO_SSL_INTERN + typedef struct ssl2_state_st { int three_byte_header; @@ -219,6 +221,8 @@ typedef struct ssl2_state_st } tmp; } SSL2_STATE; +#endif + /* SSLv2 */ /* client */ #define SSL2_ST_SEND_CLIENT_HELLO_A (0x10|SSL_ST_CONNECT) diff --git a/app/openssl/ssl/ssl3.h b/app/openssl/ssl/ssl3.h index f9268c57..f205f73d 100644 --- a/app/openssl/ssl/ssl3.h +++ b/app/openssl/ssl/ssl3.h @@ -280,9 +280,6 @@ extern "C" { #define SSL3_RT_MAX_EXTRA (16384) -/* Default buffer length used for writen records. Thus a generated record - * will contain plaintext no larger than this value. */ -#define SSL3_RT_DEFAULT_PLAIN_LENGTH 2048 /* Maximum plaintext length: defined by SSL/TLS standards */ #define SSL3_RT_MAX_PLAIN_LENGTH 16384 /* Maximum compression overhead: defined by SSL/TLS standards */ @@ -314,13 +311,6 @@ extern "C" { #define SSL3_RT_MAX_PACKET_SIZE \ (SSL3_RT_MAX_ENCRYPTED_LENGTH+SSL3_RT_HEADER_LENGTH) -/* Extra space for empty fragment, headers, MAC, and padding. */ -#define SSL3_RT_DEFAULT_WRITE_OVERHEAD 256 -#define SSL3_RT_DEFAULT_PACKET_SIZE 4096 - SSL3_RT_DEFAULT_WRITE_OVERHEAD -#if SSL3_RT_DEFAULT_PLAIN_LENGTH + SSL3_RT_DEFAULT_WRITE_OVERHEAD > SSL3_RT_DEFAULT_PACKET_SIZE -#error "Insufficient space allocated for write buffers." -#endif - #define SSL3_MD_CLIENT_FINISHED_CONST "\x43\x4C\x4E\x54" #define SSL3_MD_SERVER_FINISHED_CONST "\x53\x52\x56\x52" @@ -332,6 +322,7 @@ extern "C" { #define SSL3_RT_ALERT 21 #define SSL3_RT_HANDSHAKE 22 #define SSL3_RT_APPLICATION_DATA 23 +#define TLS1_RT_HEARTBEAT 24 #define SSL3_AL_WARNING 1 #define SSL3_AL_FATAL 2 @@ -349,6 +340,11 @@ extern "C" { #define SSL3_AD_CERTIFICATE_UNKNOWN 46 #define SSL3_AD_ILLEGAL_PARAMETER 47 /* fatal */ +#define TLS1_HB_REQUEST 1 +#define TLS1_HB_RESPONSE 2 + +#ifndef OPENSSL_NO_SSL_INTERN + typedef struct ssl3_record_st { /*r */ int type; /* type of record */ @@ -370,6 +366,8 @@ typedef struct ssl3_buffer_st int left; /* how many bytes left */ } SSL3_BUFFER; +#endif + #define SSL3_CT_RSA_SIGN 1 #define SSL3_CT_DSS_SIGN 2 #define SSL3_CT_RSA_FIXED_DH 3 @@ -389,6 +387,24 @@ typedef struct ssl3_buffer_st #define SSL3_FLAGS_POP_BUFFER 0x0004 #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008 #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010 +#define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020 +/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at + * this point in the handshake. If this flag is not set then received CCS + * records will cause a fatal error for the connection. */ +#define SSL3_FLAGS_CCS_OK 0x0080 + +/* SSL3_FLAGS_SGC_RESTART_DONE is set when we + * restart a handshake because of MS SGC and so prevents us + * from restarting the handshake in a loop. It's reset on a + * renegotiation, so effectively limits the client to one restart + * per negotiation. This limits the possibility of a DDoS + * attack where the client handshakes in a loop using SGC to + * restart. Servers which permit renegotiation can still be + * effected, but we can't prevent that. + */ +#define SSL3_FLAGS_SGC_RESTART_DONE 0x0040 + +#ifndef OPENSSL_NO_SSL_INTERN typedef struct ssl3_state_st { @@ -406,8 +422,8 @@ typedef struct ssl3_state_st unsigned char client_random[SSL3_RANDOM_SIZE]; /* flags for countermeasure against known-IV weakness */ - int need_empty_fragments; - int empty_fragment_done; + int need_record_splitting; + int record_split_done; /* The value of 'extra' when the buffers were initialized */ int init_extra; @@ -465,12 +481,6 @@ typedef struct ssl3_state_st void *server_opaque_prf_input; size_t server_opaque_prf_input_len; -#ifndef OPENSSL_NO_NEXTPROTONEG - /* Set if we saw the Next Protocol Negotiation extension from - our peer. */ - int next_proto_neg_seen; -#endif - struct { /* actually only needs to be 16+20 */ unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2]; @@ -480,7 +490,7 @@ typedef struct ssl3_state_st int finish_md_len; unsigned char peer_finish_md[EVP_MAX_MD_SIZE*2]; int peer_finish_md_len; - + unsigned long message_size; int message_type; @@ -528,14 +538,64 @@ typedef struct ssl3_state_st unsigned char previous_server_finished[EVP_MAX_MD_SIZE]; unsigned char previous_server_finished_len; int send_connection_binding; /* TODOEKR */ + +#ifndef OPENSSL_NO_NEXTPROTONEG + /* Set if we saw the Next Protocol Negotiation extension from our peer. */ + int next_proto_neg_seen; +#endif + +#ifndef OPENSSL_NO_TLSEXT +#ifndef OPENSSL_NO_EC + /* This is set to true if we believe that this is a version of Safari + * running on OS X 10.6 or newer. We wish to know this because Safari + * on 10.8 .. 10.8.3 has broken ECDHE-ECDSA support. */ + char is_probably_safari; +#endif /* !OPENSSL_NO_EC */ +#endif /* !OPENSSL_NO_TLSEXT */ + + /* In a client, this means that the server supported Channel ID and that + * a Channel ID was sent. In a server it means that we echoed support + * for Channel IDs and that tlsext_channel_id will be valid after the + * handshake. */ + char tlsext_channel_id_valid; + /* For a server: + * If |tlsext_channel_id_valid| is true, then this contains the + * verified Channel ID from the client: a P256 point, (x,y), where + * each are big-endian values. */ + unsigned char tlsext_channel_id[64]; + + /* ALPN information + * (we are in the process of transitioning from NPN to ALPN.) */ + + /* In a server these point to the selected ALPN protocol after the + * ClientHello has been processed. In a client these contain the + * protocol that the server selected once the ServerHello has been + * processed. */ + unsigned char *alpn_selected; + unsigned alpn_selected_len; + + /* These point to the digest function to use for signatures made with + * each type of public key. A NULL value indicates that the default + * digest should be used, which is SHA1 as of TLS 1.2. + * + * (These should be in the tmp member, but we have to put them here to + * ensure binary compatibility with earlier OpenSSL 1.0.* releases.) */ + const EVP_MD *digest_rsa; + const EVP_MD *digest_dsa; + const EVP_MD *digest_ecdsa; } SSL3_STATE; +#endif /* SSLv3 */ /*client */ /* extra state */ #define SSL3_ST_CW_FLUSH (0x100|SSL_ST_CONNECT) #define SSL3_ST_CUTTHROUGH_COMPLETE (0x101|SSL_ST_CONNECT) +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_ST_CW_WRITE_SOCK (0x310|SSL_ST_CONNECT) +#define DTLS1_SCTP_ST_CR_READ_SOCK (0x320|SSL_ST_CONNECT) +#endif /* write to server */ #define SSL3_ST_CW_CLNT_HELLO_A (0x110|SSL_ST_CONNECT) #define SSL3_ST_CW_CLNT_HELLO_B (0x111|SSL_ST_CONNECT) @@ -567,6 +627,8 @@ typedef struct ssl3_state_st #define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT) #define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT) #endif +#define SSL3_ST_CW_CHANNEL_ID_A (0x210|SSL_ST_CONNECT) +#define SSL3_ST_CW_CHANNEL_ID_B (0x211|SSL_ST_CONNECT) #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT) #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT) /* read from server */ @@ -582,6 +644,10 @@ typedef struct ssl3_state_st /* server */ /* extra state */ #define SSL3_ST_SW_FLUSH (0x100|SSL_ST_ACCEPT) +#ifndef OPENSSL_NO_SCTP +#define DTLS1_SCTP_ST_SW_WRITE_SOCK (0x310|SSL_ST_ACCEPT) +#define DTLS1_SCTP_ST_SR_READ_SOCK (0x320|SSL_ST_ACCEPT) +#endif /* read from client */ /* Do not change the number values, they do matter */ #define SSL3_ST_SR_CLNT_HELLO_A (0x110|SSL_ST_ACCEPT) @@ -612,10 +678,13 @@ typedef struct ssl3_state_st #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT) #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT) +#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT) #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT) #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT) #endif +#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT) +#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT) #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT) /* write to client */ @@ -643,6 +712,7 @@ typedef struct ssl3_state_st #ifndef OPENSSL_NO_NEXTPROTONEG #define SSL3_MT_NEXT_PROTO 67 #endif +#define SSL3_MT_ENCRYPTED_EXTENSIONS 203 #define DTLS1_MT_HELLO_VERIFY_REQUEST 3 diff --git a/app/openssl/ssl/ssl_algs.c b/app/openssl/ssl/ssl_algs.c index 0967b2df..9c34d197 100644 --- a/app/openssl/ssl/ssl_algs.c +++ b/app/openssl/ssl/ssl_algs.c @@ -73,6 +73,9 @@ int SSL_library_init(void) #endif #ifndef OPENSSL_NO_RC4 EVP_add_cipher(EVP_rc4()); +#if !defined(OPENSSL_NO_MD5) && (defined(__x86_64) || defined(__x86_64__)) + EVP_add_cipher(EVP_rc4_hmac_md5()); +#endif #endif #ifndef OPENSSL_NO_RC2 EVP_add_cipher(EVP_rc2_cbc()); @@ -85,6 +88,13 @@ int SSL_library_init(void) EVP_add_cipher(EVP_aes_128_cbc()); EVP_add_cipher(EVP_aes_192_cbc()); EVP_add_cipher(EVP_aes_256_cbc()); + EVP_add_cipher(EVP_aes_128_gcm()); + EVP_add_cipher(EVP_aes_256_gcm()); +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) + EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1()); + EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1()); +#endif + #endif #ifndef OPENSSL_NO_CAMELLIA EVP_add_cipher(EVP_camellia_128_cbc()); diff --git a/app/openssl/ssl/ssl_asn1.c b/app/openssl/ssl/ssl_asn1.c index d7f4c608..38540be1 100644 --- a/app/openssl/ssl/ssl_asn1.c +++ b/app/openssl/ssl/ssl_asn1.c @@ -114,6 +114,9 @@ typedef struct ssl_session_asn1_st ASN1_OCTET_STRING psk_identity_hint; ASN1_OCTET_STRING psk_identity; #endif /* OPENSSL_NO_PSK */ +#ifndef OPENSSL_NO_SRP + ASN1_OCTET_STRING srp_username; +#endif /* OPENSSL_NO_SRP */ } SSL_SESSION_ASN1; int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) @@ -129,6 +132,9 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) #ifndef OPENSSL_NO_COMP unsigned char cbuf; int v11=0; +#endif +#ifndef OPENSSL_NO_SRP + int v12=0; #endif long l; SSL_SESSION_ASN1 a; @@ -267,6 +273,14 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) a.psk_identity.data=(unsigned char *)(in->psk_identity); } #endif /* OPENSSL_NO_PSK */ +#ifndef OPENSSL_NO_SRP + if (in->srp_username) + { + a.srp_username.length=strlen(in->srp_username); + a.srp_username.type=V_ASN1_OCTET_STRING; + a.srp_username.data=(unsigned char *)(in->srp_username); + } +#endif /* OPENSSL_NO_SRP */ M_ASN1_I2D_len(&(a.version), i2d_ASN1_INTEGER); M_ASN1_I2D_len(&(a.ssl_version), i2d_ASN1_INTEGER); @@ -307,6 +321,10 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->psk_identity) M_ASN1_I2D_len_EXP_opt(&(a.psk_identity), i2d_ASN1_OCTET_STRING,8,v8); #endif /* OPENSSL_NO_PSK */ +#ifndef OPENSSL_NO_SRP + if (in->srp_username) + M_ASN1_I2D_len_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); +#endif /* OPENSSL_NO_SRP */ M_ASN1_I2D_seq_total(); @@ -351,6 +369,10 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp) if (in->compress_meth) M_ASN1_I2D_put_EXP_opt(&(a.comp_id), i2d_ASN1_OCTET_STRING,11,v11); #endif +#ifndef OPENSSL_NO_SRP + if (in->srp_username) + M_ASN1_I2D_put_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12); +#endif /* OPENSSL_NO_SRP */ M_ASN1_I2D_finish(); } @@ -549,6 +571,19 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, } else ret->psk_identity_hint=NULL; + + os.length=0; + os.data=NULL; + M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,8); + if (os.data) + { + ret->psk_identity = BUF_strndup((char *)os.data, os.length); + OPENSSL_free(os.data); + os.data = NULL; + os.length = 0; + } + else + ret->psk_identity=NULL; #endif /* OPENSSL_NO_PSK */ #ifndef OPENSSL_NO_TLSEXT @@ -588,5 +623,20 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, } #endif +#ifndef OPENSSL_NO_SRP + os.length=0; + os.data=NULL; + M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,12); + if (os.data) + { + ret->srp_username = BUF_strndup((char *)os.data, os.length); + OPENSSL_free(os.data); + os.data = NULL; + os.length = 0; + } + else + ret->srp_username=NULL; +#endif /* OPENSSL_NO_SRP */ + M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION); } diff --git a/app/openssl/ssl/ssl_cert.c b/app/openssl/ssl/ssl_cert.c index 27256eea..bc4150b0 100644 --- a/app/openssl/ssl/ssl_cert.c +++ b/app/openssl/ssl/ssl_cert.c @@ -174,7 +174,6 @@ CERT *ssl_cert_new(void) ret->key= &(ret->pkeys[SSL_PKEY_RSA_ENC]); ret->references=1; - return(ret); } diff --git a/app/openssl/ssl/ssl_ciph.c b/app/openssl/ssl/ssl_ciph.c index e523a8f2..e8794d4b 100644 --- a/app/openssl/ssl/ssl_ciph.c +++ b/app/openssl/ssl/ssl_ciph.c @@ -162,11 +162,13 @@ #define SSL_ENC_CAMELLIA256_IDX 9 #define SSL_ENC_GOST89_IDX 10 #define SSL_ENC_SEED_IDX 11 -#define SSL_ENC_NUM_IDX 12 +#define SSL_ENC_AES128GCM_IDX 12 +#define SSL_ENC_AES256GCM_IDX 13 +#define SSL_ENC_NUM_IDX 14 static const EVP_CIPHER *ssl_cipher_methods[SSL_ENC_NUM_IDX]={ - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL }; #define SSL_COMP_NULL_IDX 0 @@ -179,28 +181,32 @@ static STACK_OF(SSL_COMP) *ssl_comp_methods=NULL; #define SSL_MD_SHA1_IDX 1 #define SSL_MD_GOST94_IDX 2 #define SSL_MD_GOST89MAC_IDX 3 +#define SSL_MD_SHA256_IDX 4 +#define SSL_MD_SHA384_IDX 5 /*Constant SSL_MAX_DIGEST equal to size of digests array should be * defined in the * ssl_locl.h */ #define SSL_MD_NUM_IDX SSL_MAX_DIGEST static const EVP_MD *ssl_digest_methods[SSL_MD_NUM_IDX]={ - NULL,NULL,NULL,NULL + NULL,NULL,NULL,NULL,NULL,NULL }; /* PKEY_TYPE for GOST89MAC is known in advance, but, because * implementation is engine-provided, we'll fill it only if * corresponding EVP_PKEY_METHOD is found */ static int ssl_mac_pkey_id[SSL_MD_NUM_IDX]={ - EVP_PKEY_HMAC,EVP_PKEY_HMAC,EVP_PKEY_HMAC,NID_undef + EVP_PKEY_HMAC,EVP_PKEY_HMAC,EVP_PKEY_HMAC,NID_undef, + EVP_PKEY_HMAC,EVP_PKEY_HMAC }; static int ssl_mac_secret_size[SSL_MD_NUM_IDX]={ - 0,0,0,0 + 0,0,0,0,0,0 }; static int ssl_handshake_digest_flag[SSL_MD_NUM_IDX]={ SSL_HANDSHAKE_MAC_MD5,SSL_HANDSHAKE_MAC_SHA, - SSL_HANDSHAKE_MAC_GOST94,0 + SSL_HANDSHAKE_MAC_GOST94, 0, SSL_HANDSHAKE_MAC_SHA256, + SSL_HANDSHAKE_MAC_SHA384 }; #define CIPHER_ADD 1 @@ -247,6 +253,7 @@ static const SSL_CIPHER cipher_aliases[]={ {0,SSL_TXT_ECDH,0, SSL_kECDHr|SSL_kECDHe|SSL_kEECDH,0,0,0,0,0,0,0,0}, {0,SSL_TXT_kPSK,0, SSL_kPSK, 0,0,0,0,0,0,0,0}, + {0,SSL_TXT_kSRP,0, SSL_kSRP, 0,0,0,0,0,0,0,0}, {0,SSL_TXT_kGOST,0, SSL_kGOST,0,0,0,0,0,0,0,0}, /* server authentication aliases */ @@ -273,6 +280,7 @@ static const SSL_CIPHER cipher_aliases[]={ {0,SSL_TXT_ADH,0, SSL_kEDH,SSL_aNULL,0,0,0,0,0,0,0}, {0,SSL_TXT_AECDH,0, SSL_kEECDH,SSL_aNULL,0,0,0,0,0,0,0}, {0,SSL_TXT_PSK,0, SSL_kPSK,SSL_aPSK,0,0,0,0,0,0,0}, + {0,SSL_TXT_SRP,0, SSL_kSRP,0,0,0,0,0,0,0,0}, /* symmetric encryption aliases */ @@ -283,9 +291,10 @@ static const SSL_CIPHER cipher_aliases[]={ {0,SSL_TXT_IDEA,0, 0,0,SSL_IDEA, 0,0,0,0,0,0}, {0,SSL_TXT_SEED,0, 0,0,SSL_SEED, 0,0,0,0,0,0}, {0,SSL_TXT_eNULL,0, 0,0,SSL_eNULL, 0,0,0,0,0,0}, - {0,SSL_TXT_AES128,0, 0,0,SSL_AES128,0,0,0,0,0,0}, - {0,SSL_TXT_AES256,0, 0,0,SSL_AES256,0,0,0,0,0,0}, - {0,SSL_TXT_AES,0, 0,0,SSL_AES128|SSL_AES256,0,0,0,0,0,0}, + {0,SSL_TXT_AES128,0, 0,0,SSL_AES128|SSL_AES128GCM,0,0,0,0,0,0}, + {0,SSL_TXT_AES256,0, 0,0,SSL_AES256|SSL_AES256GCM,0,0,0,0,0,0}, + {0,SSL_TXT_AES,0, 0,0,SSL_AES,0,0,0,0,0,0}, + {0,SSL_TXT_AES_GCM,0, 0,0,SSL_AES128GCM|SSL_AES256GCM,0,0,0,0,0,0}, {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0}, @@ -296,11 +305,14 @@ static const SSL_CIPHER cipher_aliases[]={ {0,SSL_TXT_SHA,0, 0,0,0,SSL_SHA1, 0,0,0,0,0}, {0,SSL_TXT_GOST94,0, 0,0,0,SSL_GOST94, 0,0,0,0,0}, {0,SSL_TXT_GOST89MAC,0, 0,0,0,SSL_GOST89MAC, 0,0,0,0,0}, + {0,SSL_TXT_SHA256,0, 0,0,0,SSL_SHA256, 0,0,0,0,0}, + {0,SSL_TXT_SHA384,0, 0,0,0,SSL_SHA384, 0,0,0,0,0}, /* protocol version aliases */ {0,SSL_TXT_SSLV2,0, 0,0,0,0,SSL_SSLV2, 0,0,0,0}, {0,SSL_TXT_SSLV3,0, 0,0,0,0,SSL_SSLV3, 0,0,0,0}, {0,SSL_TXT_TLSV1,0, 0,0,0,0,SSL_TLSV1, 0,0,0,0}, + {0,SSL_TXT_TLSV1_2,0, 0,0,0,0,SSL_TLSV1_2, 0,0,0,0}, /* export flag */ {0,SSL_TXT_EXP,0, 0,0,0,0,0,SSL_EXPORT,0,0,0}, @@ -379,6 +391,11 @@ void ssl_load_ciphers(void) ssl_cipher_methods[SSL_ENC_SEED_IDX]= EVP_get_cipherbyname(SN_seed_cbc); + ssl_cipher_methods[SSL_ENC_AES128GCM_IDX]= + EVP_get_cipherbyname(SN_aes_128_gcm); + ssl_cipher_methods[SSL_ENC_AES256GCM_IDX]= + EVP_get_cipherbyname(SN_aes_256_gcm); + ssl_digest_methods[SSL_MD_MD5_IDX]= EVP_get_digestbyname(SN_md5); ssl_mac_secret_size[SSL_MD_MD5_IDX]= @@ -404,6 +421,14 @@ void ssl_load_ciphers(void) ssl_mac_secret_size[SSL_MD_GOST89MAC_IDX]=32; } + ssl_digest_methods[SSL_MD_SHA256_IDX]= + EVP_get_digestbyname(SN_sha256); + ssl_mac_secret_size[SSL_MD_SHA256_IDX]= + EVP_MD_size(ssl_digest_methods[SSL_MD_SHA256_IDX]); + ssl_digest_methods[SSL_MD_SHA384_IDX]= + EVP_get_digestbyname(SN_sha384); + ssl_mac_secret_size[SSL_MD_SHA384_IDX]= + EVP_MD_size(ssl_digest_methods[SSL_MD_SHA384_IDX]); } #ifndef OPENSSL_NO_COMP @@ -446,6 +471,7 @@ static void load_builtin_compressions(void) sk_SSL_COMP_push(ssl_comp_methods,comp); } } + sk_SSL_COMP_sort(ssl_comp_methods); } MemCheck_on(); } @@ -525,6 +551,12 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, case SSL_SEED: i=SSL_ENC_SEED_IDX; break; + case SSL_AES128GCM: + i=SSL_ENC_AES128GCM_IDX; + break; + case SSL_AES256GCM: + i=SSL_ENC_AES256GCM_IDX; + break; default: i= -1; break; @@ -548,6 +580,12 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, case SSL_SHA1: i=SSL_MD_SHA1_IDX; break; + case SSL_SHA256: + i=SSL_MD_SHA256_IDX; + break; + case SSL_SHA384: + i=SSL_MD_SHA384_IDX; + break; case SSL_GOST94: i = SSL_MD_GOST94_IDX; break; @@ -563,17 +601,45 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, *md=NULL; if (mac_pkey_type!=NULL) *mac_pkey_type = NID_undef; if (mac_secret_size!=NULL) *mac_secret_size = 0; - + if (c->algorithm_mac == SSL_AEAD) + mac_pkey_type = NULL; } else { *md=ssl_digest_methods[i]; if (mac_pkey_type!=NULL) *mac_pkey_type = ssl_mac_pkey_id[i]; if (mac_secret_size!=NULL) *mac_secret_size = ssl_mac_secret_size[i]; - } + } + + if ((*enc != NULL) && + (*md != NULL || (EVP_CIPHER_flags(*enc)&EVP_CIPH_FLAG_AEAD_CIPHER)) && + (!mac_pkey_type||*mac_pkey_type != NID_undef)) + { + const EVP_CIPHER *evp; + + if (s->ssl_version>>8 != TLS1_VERSION_MAJOR || + s->ssl_version < TLS1_VERSION) + return 1; - if ((*enc != NULL) && (*md != NULL) && (!mac_pkey_type||*mac_pkey_type != NID_undef)) +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return 1; +#endif + + if (c->algorithm_enc == SSL_RC4 && + c->algorithm_mac == SSL_MD5 && + (evp=EVP_get_cipherbyname("RC4-HMAC-MD5"))) + *enc = evp, *md = NULL; + else if (c->algorithm_enc == SSL_AES128 && + c->algorithm_mac == SSL_SHA1 && + (evp=EVP_get_cipherbyname("AES-128-CBC-HMAC-SHA1"))) + *enc = evp, *md = NULL; + else if (c->algorithm_enc == SSL_AES256 && + c->algorithm_mac == SSL_SHA1 && + (evp=EVP_get_cipherbyname("AES-256-CBC-HMAC-SHA1"))) + *enc = evp, *md = NULL; return(1); + } else return(0); } @@ -584,9 +650,11 @@ int ssl_get_handshake_digest(int idx, long *mask, const EVP_MD **md) { return 0; } - if (ssl_handshake_digest_flag[idx]==0) return 0; *mask = ssl_handshake_digest_flag[idx]; - *md = ssl_digest_methods[idx]; + if (*mask) + *md = ssl_digest_methods[idx]; + else + *md = NULL; return 1; } @@ -660,6 +728,9 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, un #ifdef OPENSSL_NO_PSK *mkey |= SSL_kPSK; *auth |= SSL_aPSK; +#endif +#ifdef OPENSSL_NO_SRP + *mkey |= SSL_kSRP; #endif /* Check for presence of GOST 34.10 algorithms, and if they * do not present, disable appropriate auth and key exchange */ @@ -686,6 +757,8 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, un *enc |= (ssl_cipher_methods[SSL_ENC_IDEA_IDX] == NULL) ? SSL_IDEA:0; *enc |= (ssl_cipher_methods[SSL_ENC_AES128_IDX] == NULL) ? SSL_AES128:0; *enc |= (ssl_cipher_methods[SSL_ENC_AES256_IDX] == NULL) ? SSL_AES256:0; + *enc |= (ssl_cipher_methods[SSL_ENC_AES128GCM_IDX] == NULL) ? SSL_AES128GCM:0; + *enc |= (ssl_cipher_methods[SSL_ENC_AES256GCM_IDX] == NULL) ? SSL_AES256GCM:0; *enc |= (ssl_cipher_methods[SSL_ENC_CAMELLIA128_IDX] == NULL) ? SSL_CAMELLIA128:0; *enc |= (ssl_cipher_methods[SSL_ENC_CAMELLIA256_IDX] == NULL) ? SSL_CAMELLIA256:0; *enc |= (ssl_cipher_methods[SSL_ENC_GOST89_IDX] == NULL) ? SSL_eGOST2814789CNT:0; @@ -693,6 +766,8 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, un *mac |= (ssl_digest_methods[SSL_MD_MD5_IDX ] == NULL) ? SSL_MD5 :0; *mac |= (ssl_digest_methods[SSL_MD_SHA1_IDX] == NULL) ? SSL_SHA1:0; + *mac |= (ssl_digest_methods[SSL_MD_SHA256_IDX] == NULL) ? SSL_SHA256:0; + *mac |= (ssl_digest_methods[SSL_MD_SHA384_IDX] == NULL) ? SSL_SHA384:0; *mac |= (ssl_digest_methods[SSL_MD_GOST94_IDX] == NULL) ? SSL_GOST94:0; *mac |= (ssl_digest_methods[SSL_MD_GOST89MAC_IDX] == NULL || ssl_mac_pkey_id[SSL_MD_GOST89MAC_IDX]==NID_undef)? SSL_GOST89MAC:0; @@ -723,6 +798,9 @@ static void ssl_cipher_collect_ciphers(const SSL_METHOD *ssl_method, c = ssl_method->get_cipher(i); /* drop those that use any of that is not available */ if ((c != NULL) && c->valid && +#ifdef OPENSSL_FIPS + (!FIPS_mode() || (c->algo_strength & SSL_FIPS)) && +#endif !(c->algorithm_mkey & disabled_mkey) && !(c->algorithm_auth & disabled_auth) && !(c->algorithm_enc & disabled_enc) && @@ -1073,9 +1151,9 @@ static int ssl_cipher_process_rulestr(const char *rule_str, while ( ((ch >= 'A') && (ch <= 'Z')) || ((ch >= '0') && (ch <= '9')) || ((ch >= 'a') && (ch <= 'z')) || - (ch == '-')) + (ch == '-') || (ch == '.')) #else - while ( isalnum(ch) || (ch == '-')) + while ( isalnum(ch) || (ch == '-') || (ch == '.')) #endif { ch = *(++l); @@ -1422,7 +1500,11 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, */ for (curr = head; curr != NULL; curr = curr->next) { +#ifdef OPENSSL_FIPS + if (curr->active && (!FIPS_mode() || curr->cipher->algo_strength & SSL_FIPS)) +#else if (curr->active) +#endif { sk_SSL_CIPHER_push(cipherstack, curr->cipher); #ifdef CIPHER_DEBUG @@ -1479,6 +1561,8 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) ver="SSLv2"; else if (alg_ssl & SSL_SSLV3) ver="SSLv3"; + else if (alg_ssl & SSL_TLSV1_2) + ver="TLSv1.2"; else ver="unknown"; @@ -1511,6 +1595,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_kPSK: kx="PSK"; break; + case SSL_kSRP: + kx="SRP"; + break; default: kx="unknown"; } @@ -1573,6 +1660,12 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_AES256: enc="AES(256)"; break; + case SSL_AES128GCM: + enc="AESGCM(128)"; + break; + case SSL_AES256GCM: + enc="AESGCM(256)"; + break; case SSL_CAMELLIA128: enc="Camellia(128)"; break; @@ -1595,6 +1688,15 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_SHA1: mac="SHA1"; break; + case SSL_SHA256: + mac="SHA256"; + break; + case SSL_SHA384: + mac="SHA384"; + break; + case SSL_AEAD: + mac="AEAD"; + break; default: mac="unknown"; break; @@ -1652,6 +1754,11 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER *c, int *alg_bits) return(ret); } +unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c) + { + return c->id; + } + /* return string version of key exchange algorithm */ const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher) { diff --git a/app/openssl/ssl/ssl_err.c b/app/openssl/ssl/ssl_err.c index 8bff5905..bddd7949 100644 --- a/app/openssl/ssl/ssl_err.c +++ b/app/openssl/ssl/ssl_err.c @@ -1,6 +1,6 @@ /* ssl/ssl_err.c */ /* ==================================================================== - * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,6 +80,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_DTLS1_ACCEPT), "DTLS1_ACCEPT"}, {ERR_FUNC(SSL_F_DTLS1_ADD_CERT_TO_BUF), "DTLS1_ADD_CERT_TO_BUF"}, {ERR_FUNC(SSL_F_DTLS1_BUFFER_RECORD), "DTLS1_BUFFER_RECORD"}, +{ERR_FUNC(SSL_F_DTLS1_CHECK_TIMEOUT_NUM), "DTLS1_CHECK_TIMEOUT_NUM"}, {ERR_FUNC(SSL_F_DTLS1_CLIENT_HELLO), "DTLS1_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_DTLS1_CONNECT), "DTLS1_CONNECT"}, {ERR_FUNC(SSL_F_DTLS1_ENC), "DTLS1_ENC"}, @@ -88,6 +89,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT), "DTLS1_GET_MESSAGE_FRAGMENT"}, {ERR_FUNC(SSL_F_DTLS1_GET_RECORD), "DTLS1_GET_RECORD"}, {ERR_FUNC(SSL_F_DTLS1_HANDLE_TIMEOUT), "DTLS1_HANDLE_TIMEOUT"}, +{ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "DTLS1_HEARTBEAT"}, {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "DTLS1_OUTPUT_CERT_CHAIN"}, {ERR_FUNC(SSL_F_DTLS1_PREPROCESS_FRAGMENT), "DTLS1_PREPROCESS_FRAGMENT"}, {ERR_FUNC(SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE), "DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE"}, @@ -137,6 +139,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL3_CALLBACK_CTRL), "SSL3_CALLBACK_CTRL"}, {ERR_FUNC(SSL_F_SSL3_CHANGE_CIPHER_STATE), "SSL3_CHANGE_CIPHER_STATE"}, {ERR_FUNC(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM), "SSL3_CHECK_CERT_AND_ALGORITHM"}, +{ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "SSL3_CHECK_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "SSL3_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_SSL3_CONNECT), "SSL3_CONNECT"}, {ERR_FUNC(SSL_F_SSL3_CTRL), "SSL3_CTRL"}, @@ -148,6 +151,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL3_GET_CERTIFICATE_REQUEST), "SSL3_GET_CERTIFICATE_REQUEST"}, {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "SSL3_GET_CERT_STATUS"}, {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "SSL3_GET_CERT_VERIFY"}, +{ERR_FUNC(SSL_F_SSL3_GET_CHANNEL_ID), "SSL3_GET_CHANNEL_ID"}, {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_CERTIFICATE), "SSL3_GET_CLIENT_CERTIFICATE"}, {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "SSL3_GET_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE), "SSL3_GET_CLIENT_KEY_EXCHANGE"}, @@ -167,6 +171,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "SSL3_READ_BYTES"}, {ERR_FUNC(SSL_F_SSL3_READ_N), "SSL3_READ_N"}, {ERR_FUNC(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST), "SSL3_SEND_CERTIFICATE_REQUEST"}, +{ERR_FUNC(SSL_F_SSL3_SEND_CHANNEL_ID), "SSL3_SEND_CHANNEL_ID"}, {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE), "SSL3_SEND_CLIENT_CERTIFICATE"}, {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE), "SSL3_SEND_CLIENT_KEY_EXCHANGE"}, {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "SSL3_SEND_CLIENT_VERIFY"}, @@ -180,10 +185,12 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "SSL3_WRITE_PENDING"}, {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT), "SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT"}, {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT), "SSL_ADD_CLIENTHELLO_TLSEXT"}, +{ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT), "SSL_ADD_CLIENTHELLO_USE_SRTP_EXT"}, {ERR_FUNC(SSL_F_SSL_ADD_DIR_CERT_SUBJECTS_TO_STACK), "SSL_add_dir_cert_subjects_to_stack"}, {ERR_FUNC(SSL_F_SSL_ADD_FILE_CERT_SUBJECTS_TO_STACK), "SSL_add_file_cert_subjects_to_stack"}, {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT), "SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT"}, {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT), "SSL_ADD_SERVERHELLO_TLSEXT"}, +{ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT), "SSL_ADD_SERVERHELLO_USE_SRTP_EXT"}, {ERR_FUNC(SSL_F_SSL_BAD_METHOD), "SSL_BAD_METHOD"}, {ERR_FUNC(SSL_F_SSL_BYTES_TO_CIPHER_LIST), "SSL_BYTES_TO_CIPHER_LIST"}, {ERR_FUNC(SSL_F_SSL_CERT_DUP), "SSL_CERT_DUP"}, @@ -200,6 +207,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL_CREATE_CIPHER_LIST), "SSL_CREATE_CIPHER_LIST"}, {ERR_FUNC(SSL_F_SSL_CTRL), "SSL_ctrl"}, {ERR_FUNC(SSL_F_SSL_CTX_CHECK_PRIVATE_KEY), "SSL_CTX_check_private_key"}, +{ERR_FUNC(SSL_F_SSL_CTX_MAKE_PROFILES), "SSL_CTX_MAKE_PROFILES"}, {ERR_FUNC(SSL_F_SSL_CTX_NEW), "SSL_CTX_new"}, {ERR_FUNC(SSL_F_SSL_CTX_SET_CIPHER_LIST), "SSL_CTX_set_cipher_list"}, {ERR_FUNC(SSL_F_SSL_CTX_SET_CLIENT_CERT_ENGINE), "SSL_CTX_set_client_cert_engine"}, @@ -222,14 +230,17 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL_GET_NEW_SESSION), "SSL_GET_NEW_SESSION"}, {ERR_FUNC(SSL_F_SSL_GET_PREV_SESSION), "SSL_GET_PREV_SESSION"}, {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_CERT), "SSL_GET_SERVER_SEND_CERT"}, +{ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_PKEY), "SSL_GET_SERVER_SEND_PKEY"}, {ERR_FUNC(SSL_F_SSL_GET_SIGN_PKEY), "SSL_GET_SIGN_PKEY"}, {ERR_FUNC(SSL_F_SSL_INIT_WBIO_BUFFER), "SSL_INIT_WBIO_BUFFER"}, {ERR_FUNC(SSL_F_SSL_LOAD_CLIENT_CA_FILE), "SSL_load_client_CA_file"}, {ERR_FUNC(SSL_F_SSL_NEW), "SSL_new"}, {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT), "SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT"}, {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT), "SSL_PARSE_CLIENTHELLO_TLSEXT"}, +{ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT), "SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT"}, {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT), "SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT"}, {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT), "SSL_PARSE_SERVERHELLO_TLSEXT"}, +{ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT), "SSL_PARSE_SERVERHELLO_USE_SRTP_EXT"}, {ERR_FUNC(SSL_F_SSL_PEEK), "SSL_peek"}, {ERR_FUNC(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT), "SSL_PREPARE_CLIENTHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT), "SSL_PREPARE_SERVERHELLO_TLSEXT"}, @@ -238,6 +249,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL_RSA_PUBLIC_ENCRYPT), "SSL_RSA_PUBLIC_ENCRYPT"}, {ERR_FUNC(SSL_F_SSL_SESSION_NEW), "SSL_SESSION_new"}, {ERR_FUNC(SSL_F_SSL_SESSION_PRINT_FP), "SSL_SESSION_print_fp"}, +{ERR_FUNC(SSL_F_SSL_SESSION_SET1_ID_CONTEXT), "SSL_SESSION_set1_id_context"}, {ERR_FUNC(SSL_F_SSL_SESS_CERT_NEW), "SSL_SESS_CERT_NEW"}, {ERR_FUNC(SSL_F_SSL_SET_CERT), "SSL_SET_CERT"}, {ERR_FUNC(SSL_F_SSL_SET_CIPHER_LIST), "SSL_set_cipher_list"}, @@ -251,6 +263,7 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_SSL_SET_TRUST), "SSL_set_trust"}, {ERR_FUNC(SSL_F_SSL_SET_WFD), "SSL_set_wfd"}, {ERR_FUNC(SSL_F_SSL_SHUTDOWN), "SSL_shutdown"}, +{ERR_FUNC(SSL_F_SSL_SRP_CTX_INIT), "SSL_SRP_CTX_init"}, {ERR_FUNC(SSL_F_SSL_UNDEFINED_CONST_FUNCTION), "SSL_UNDEFINED_CONST_FUNCTION"}, {ERR_FUNC(SSL_F_SSL_UNDEFINED_FUNCTION), "SSL_UNDEFINED_FUNCTION"}, {ERR_FUNC(SSL_F_SSL_UNDEFINED_VOID_FUNCTION), "SSL_UNDEFINED_VOID_FUNCTION"}, @@ -270,6 +283,8 @@ static ERR_STRING_DATA SSL_str_functs[]= {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"}, {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, +{ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL), "TLS1_EXPORT_KEYING_MATERIAL"}, +{ERR_FUNC(SSL_F_TLS1_HEARTBEAT), "SSL_F_TLS1_HEARTBEAT"}, {ERR_FUNC(SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT), "TLS1_PREPARE_CLIENTHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT), "TLS1_PREPARE_SERVERHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_TLS1_PRF), "tls1_prf"}, @@ -312,6 +327,13 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_BAD_RSA_MODULUS_LENGTH),"bad rsa modulus length"}, {ERR_REASON(SSL_R_BAD_RSA_SIGNATURE) ,"bad rsa signature"}, {ERR_REASON(SSL_R_BAD_SIGNATURE) ,"bad signature"}, +{ERR_REASON(SSL_R_BAD_SRP_A_LENGTH) ,"bad srp a length"}, +{ERR_REASON(SSL_R_BAD_SRP_B_LENGTH) ,"bad srp b length"}, +{ERR_REASON(SSL_R_BAD_SRP_G_LENGTH) ,"bad srp g length"}, +{ERR_REASON(SSL_R_BAD_SRP_N_LENGTH) ,"bad srp n length"}, +{ERR_REASON(SSL_R_BAD_SRP_S_LENGTH) ,"bad srp s length"}, +{ERR_REASON(SSL_R_BAD_SRTP_MKI_VALUE) ,"bad srtp mki value"}, +{ERR_REASON(SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST),"bad srtp protection profile list"}, {ERR_REASON(SSL_R_BAD_SSL_FILETYPE) ,"bad ssl filetype"}, {ERR_REASON(SSL_R_BAD_SSL_SESSION_ID_LENGTH),"bad ssl session id length"}, {ERR_REASON(SSL_R_BAD_STATE) ,"bad state"}, @@ -319,12 +341,15 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_BIO_NOT_SET) ,"bio not set"}, {ERR_REASON(SSL_R_BLOCK_CIPHER_PAD_IS_WRONG),"block cipher pad is wrong"}, {ERR_REASON(SSL_R_BN_LIB) ,"bn lib"}, +{ERR_REASON(SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY),"cannot serialize public key"}, {ERR_REASON(SSL_R_CA_DN_LENGTH_MISMATCH) ,"ca dn length mismatch"}, {ERR_REASON(SSL_R_CA_DN_TOO_LONG) ,"ca dn too long"}, {ERR_REASON(SSL_R_CCS_RECEIVED_EARLY) ,"ccs received early"}, {ERR_REASON(SSL_R_CERTIFICATE_VERIFY_FAILED),"certificate verify failed"}, {ERR_REASON(SSL_R_CERT_LENGTH_MISMATCH) ,"cert length mismatch"}, {ERR_REASON(SSL_R_CHALLENGE_IS_DIFFERENT),"challenge is different"}, +{ERR_REASON(SSL_R_CHANNEL_ID_NOT_P256) ,"channel id not p256"}, +{ERR_REASON(SSL_R_CHANNEL_ID_SIGNATURE_INVALID),"Channel ID signature invalid"}, {ERR_REASON(SSL_R_CIPHER_CODE_WRONG_LENGTH),"cipher code wrong length"}, {ERR_REASON(SSL_R_CIPHER_OR_HASH_UNAVAILABLE),"cipher or hash unavailable"}, {ERR_REASON(SSL_R_CIPHER_TABLE_SRC_ERROR),"cipher table src error"}, @@ -337,6 +362,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_CONNECTION_ID_IS_DIFFERENT),"connection id is different"}, {ERR_REASON(SSL_R_CONNECTION_TYPE_NOT_SET),"connection type not set"}, {ERR_REASON(SSL_R_COOKIE_MISMATCH) ,"cookie mismatch"}, +{ERR_REASON(SSL_R_D2I_ECDSA_SIG) ,"d2i ecdsa sig"}, {ERR_REASON(SSL_R_DATA_BETWEEN_CCS_AND_FINISHED),"data between ccs and finished"}, {ERR_REASON(SSL_R_DATA_LENGTH_TOO_LONG) ,"data length too long"}, {ERR_REASON(SSL_R_DECRYPTION_FAILED) ,"decryption failed"}, @@ -350,12 +376,16 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_ECC_CERT_SHOULD_HAVE_RSA_SIGNATURE),"ecc cert should have rsa signature"}, {ERR_REASON(SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE),"ecc cert should have sha1 signature"}, {ERR_REASON(SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER),"ecgroup too large for cipher"}, +{ERR_REASON(SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST),"empty srtp protection profile list"}, {ERR_REASON(SSL_R_ENCRYPTED_LENGTH_TOO_LONG),"encrypted length too long"}, {ERR_REASON(SSL_R_ERROR_GENERATING_TMP_RSA_KEY),"error generating tmp rsa key"}, {ERR_REASON(SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST),"error in received cipher list"}, +{ERR_REASON(SSL_R_EVP_DIGESTSIGNFINAL_FAILED),"evp digestsignfinal failed"}, +{ERR_REASON(SSL_R_EVP_DIGESTSIGNINIT_FAILED),"evp digestsigninit failed"}, {ERR_REASON(SSL_R_EXCESSIVE_MESSAGE_SIZE),"excessive message size"}, {ERR_REASON(SSL_R_EXTRA_DATA_IN_MESSAGE) ,"extra data in message"}, {ERR_REASON(SSL_R_GOT_A_FIN_BEFORE_A_CCS),"got a fin before a ccs"}, +{ERR_REASON(SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS),"got Channel ID before a ccs"}, {ERR_REASON(SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS),"got next proto before a ccs"}, {ERR_REASON(SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION),"got next proto without seeing extension"}, {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST) ,"https proxy request"}, @@ -365,7 +395,9 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_INVALID_CHALLENGE_LENGTH),"invalid challenge length"}, {ERR_REASON(SSL_R_INVALID_COMMAND) ,"invalid command"}, {ERR_REASON(SSL_R_INVALID_COMPRESSION_ALGORITHM),"invalid compression algorithm"}, +{ERR_REASON(SSL_R_INVALID_MESSAGE) ,"invalid message"}, {ERR_REASON(SSL_R_INVALID_PURPOSE) ,"invalid purpose"}, +{ERR_REASON(SSL_R_INVALID_SRP_USERNAME) ,"invalid srp username"}, {ERR_REASON(SSL_R_INVALID_STATUS_RESPONSE),"invalid status response"}, {ERR_REASON(SSL_R_INVALID_TICKET_KEYS_LENGTH),"invalid ticket keys length"}, {ERR_REASON(SSL_R_INVALID_TRUST) ,"invalid trust"}, @@ -395,11 +427,13 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_MISSING_RSA_CERTIFICATE),"missing rsa certificate"}, {ERR_REASON(SSL_R_MISSING_RSA_ENCRYPTING_CERT),"missing rsa encrypting cert"}, {ERR_REASON(SSL_R_MISSING_RSA_SIGNING_CERT),"missing rsa signing cert"}, +{ERR_REASON(SSL_R_MISSING_SRP_PARAM) ,"can't find SRP server param"}, {ERR_REASON(SSL_R_MISSING_TMP_DH_KEY) ,"missing tmp dh key"}, {ERR_REASON(SSL_R_MISSING_TMP_ECDH_KEY) ,"missing tmp ecdh key"}, {ERR_REASON(SSL_R_MISSING_TMP_RSA_KEY) ,"missing tmp rsa key"}, {ERR_REASON(SSL_R_MISSING_TMP_RSA_PKEY) ,"missing tmp rsa pkey"}, {ERR_REASON(SSL_R_MISSING_VERIFY_MESSAGE),"missing verify message"}, +{ERR_REASON(SSL_R_MULTIPLE_SGC_RESTARTS) ,"multiple sgc restarts"}, {ERR_REASON(SSL_R_NON_SSLV2_INITIAL_PACKET),"non sslv2 initial packet"}, {ERR_REASON(SSL_R_NO_CERTIFICATES_RETURNED),"no certificates returned"}, {ERR_REASON(SSL_R_NO_CERTIFICATE_ASSIGNED),"no certificate assigned"}, @@ -416,6 +450,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_NO_COMPRESSION_SPECIFIED),"no compression specified"}, {ERR_REASON(SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER),"Peer haven't sent GOST certificate, required for selected ciphersuite"}, {ERR_REASON(SSL_R_NO_METHOD_SPECIFIED) ,"no method specified"}, +{ERR_REASON(SSL_R_NO_P256_SUPPORT) ,"no p256 support"}, {ERR_REASON(SSL_R_NO_PRIVATEKEY) ,"no privatekey"}, {ERR_REASON(SSL_R_NO_PRIVATE_KEY_ASSIGNED),"no private key assigned"}, {ERR_REASON(SSL_R_NO_PROTOCOLS_AVAILABLE),"no protocols available"}, @@ -423,6 +458,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_NO_RENEGOTIATION) ,"no renegotiation"}, {ERR_REASON(SSL_R_NO_REQUIRED_DIGEST) ,"digest requred for handshake isn't computed"}, {ERR_REASON(SSL_R_NO_SHARED_CIPHER) ,"no shared cipher"}, +{ERR_REASON(SSL_R_NO_SRTP_PROFILES) ,"no srtp profiles"}, {ERR_REASON(SSL_R_NO_VERIFY_CALLBACK) ,"no verify callback"}, {ERR_REASON(SSL_R_NULL_SSL_CTX) ,"null ssl ctx"}, {ERR_REASON(SSL_R_NULL_SSL_METHOD_PASSED),"null ssl method passed"}, @@ -467,7 +503,12 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_SESSION_ID_CONTEXT_UNINITIALIZED),"session id context uninitialized"}, {ERR_REASON(SSL_R_SESSION_MAY_NOT_BE_CREATED),"session may not be created"}, {ERR_REASON(SSL_R_SHORT_READ) ,"short read"}, +{ERR_REASON(SSL_R_SIGNATURE_ALGORITHMS_ERROR),"signature algorithms error"}, {ERR_REASON(SSL_R_SIGNATURE_FOR_NON_SIGNING_CERTIFICATE),"signature for non signing certificate"}, +{ERR_REASON(SSL_R_SRP_A_CALC) ,"error with the srp params"}, +{ERR_REASON(SSL_R_SRTP_COULD_NOT_ALLOCATE_PROFILES),"srtp could not allocate profiles"}, +{ERR_REASON(SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG),"srtp protection profile list too long"}, +{ERR_REASON(SSL_R_SRTP_UNKNOWN_PROTECTION_PROFILE),"srtp unknown protection profile"}, {ERR_REASON(SSL_R_SSL23_DOING_SESSION_ID_REUSE),"ssl23 doing session id reuse"}, {ERR_REASON(SSL_R_SSL2_CONNECTION_ID_TOO_LONG),"ssl2 connection id too long"}, {ERR_REASON(SSL_R_SSL3_EXT_INVALID_ECPOINTFORMAT),"ssl3 ext invalid ecpointformat"}, @@ -512,6 +553,9 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_TLSV1_UNRECOGNIZED_NAME),"tlsv1 unrecognized name"}, {ERR_REASON(SSL_R_TLSV1_UNSUPPORTED_EXTENSION),"tlsv1 unsupported extension"}, {ERR_REASON(SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER),"tls client cert req with anon cipher"}, +{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbearts"}, +{ERR_REASON(SSL_R_TLS_HEARTBEAT_PENDING) ,"heartbeat request already pending"}, +{ERR_REASON(SSL_R_TLS_ILLEGAL_EXPORTER_LABEL),"tls illegal exporter label"}, {ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST),"tls invalid ecpointformat list"}, {ERR_REASON(SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST),"tls peer did not respond with certificate list"}, {ERR_REASON(SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG),"tls rsa encrypted value length is wrong"}, @@ -533,6 +577,7 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_UNKNOWN_CERTIFICATE_TYPE),"unknown certificate type"}, {ERR_REASON(SSL_R_UNKNOWN_CIPHER_RETURNED),"unknown cipher returned"}, {ERR_REASON(SSL_R_UNKNOWN_CIPHER_TYPE) ,"unknown cipher type"}, +{ERR_REASON(SSL_R_UNKNOWN_DIGEST) ,"unknown digest"}, {ERR_REASON(SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE),"unknown key exchange type"}, {ERR_REASON(SSL_R_UNKNOWN_PKEY_TYPE) ,"unknown pkey type"}, {ERR_REASON(SSL_R_UNKNOWN_PROTOCOL) ,"unknown protocol"}, @@ -547,16 +592,19 @@ static ERR_STRING_DATA SSL_str_reasons[]= {ERR_REASON(SSL_R_UNSUPPORTED_PROTOCOL) ,"unsupported protocol"}, {ERR_REASON(SSL_R_UNSUPPORTED_SSL_VERSION),"unsupported ssl version"}, {ERR_REASON(SSL_R_UNSUPPORTED_STATUS_TYPE),"unsupported status type"}, +{ERR_REASON(SSL_R_USE_SRTP_NOT_NEGOTIATED),"use srtp not negotiated"}, {ERR_REASON(SSL_R_WRITE_BIO_NOT_SET) ,"write bio not set"}, {ERR_REASON(SSL_R_WRONG_CIPHER_RETURNED) ,"wrong cipher returned"}, {ERR_REASON(SSL_R_WRONG_MESSAGE_TYPE) ,"wrong message type"}, {ERR_REASON(SSL_R_WRONG_NUMBER_OF_KEY_BITS),"wrong number of key bits"}, {ERR_REASON(SSL_R_WRONG_SIGNATURE_LENGTH),"wrong signature length"}, {ERR_REASON(SSL_R_WRONG_SIGNATURE_SIZE) ,"wrong signature size"}, +{ERR_REASON(SSL_R_WRONG_SIGNATURE_TYPE) ,"wrong signature type"}, {ERR_REASON(SSL_R_WRONG_SSL_VERSION) ,"wrong ssl version"}, {ERR_REASON(SSL_R_WRONG_VERSION_NUMBER) ,"wrong version number"}, {ERR_REASON(SSL_R_X509_LIB) ,"x509 lib"}, {ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"}, +{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"}, {0,NULL} }; diff --git a/app/openssl/ssl/ssl_lib.c b/app/openssl/ssl/ssl_lib.c index b169ba93..8d2c3a76 100644 --- a/app/openssl/ssl/ssl_lib.c +++ b/app/openssl/ssl/ssl_lib.c @@ -176,7 +176,10 @@ SSL3_ENC_METHOD ssl3_undef_enc_method={ 0, /* client_finished_label_len */ NULL, /* server_finished_label */ 0, /* server_finished_label_len */ - (int (*)(int))ssl_undefined_function + (int (*)(int))ssl_undefined_function, + (int (*)(SSL *, unsigned char *, size_t, const char *, + size_t, const unsigned char *, size_t, + int use_context)) ssl_undefined_function, }; int SSL_clear(SSL *s) @@ -202,9 +205,9 @@ int SSL_clear(SSL *s) * needed because SSL_clear is not called when doing renegotiation) */ /* This is set if we are doing dynamic renegotiation so keep * the old cipher. It is sort of a SSL_clear_lite :-) */ - if (s->new_session) return(1); + if (s->renegotiate) return(1); #else - if (s->new_session) + if (s->renegotiate) { SSLerr(SSL_F_SSL_CLEAR,ERR_R_INTERNAL_ERROR); return 0; @@ -357,6 +360,17 @@ SSL *SSL_new(SSL_CTX *ctx) # ifndef OPENSSL_NO_NEXTPROTONEG s->next_proto_negotiated = NULL; # endif + + if (s->ctx->alpn_client_proto_list) + { + s->alpn_client_proto_list = + OPENSSL_malloc(s->ctx->alpn_client_proto_list_len); + if (s->alpn_client_proto_list == NULL) + goto err; + memcpy(s->alpn_client_proto_list, s->ctx->alpn_client_proto_list, + s->ctx->alpn_client_proto_list_len); + s->alpn_client_proto_list_len = s->ctx->alpn_client_proto_list_len; + } #endif s->verify_result=X509_V_OK; @@ -374,6 +388,13 @@ SSL *SSL_new(SSL_CTX *ctx) CRYPTO_new_ex_data(CRYPTO_EX_INDEX_SSL, s, &s->ex_data); #ifndef OPENSSL_NO_PSK + s->psk_identity_hint = NULL; + if (ctx->psk_identity_hint) + { + s->psk_identity_hint = BUF_strdup(ctx->psk_identity_hint); + if (s->psk_identity_hint == NULL) + goto err; + } s->psk_client_callback=ctx->psk_client_callback; s->psk_server_callback=ctx->psk_server_callback; #endif @@ -576,6 +597,15 @@ void SSL_free(SSL *s) sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, OCSP_RESPID_free); if (s->tlsext_ocsp_resp) OPENSSL_free(s->tlsext_ocsp_resp); + if (s->tlsext_channel_id_private) + EVP_PKEY_free(s->tlsext_channel_id_private); + if (s->alpn_client_proto_list) + OPENSSL_free(s->alpn_client_proto_list); +#endif + +#ifndef OPENSSL_NO_PSK + if (s->psk_identity_hint) + OPENSSL_free(s->psk_identity_hint); #endif if (s->client_CA != NULL) @@ -595,6 +625,11 @@ void SSL_free(SSL *s) OPENSSL_free(s->next_proto_negotiated); #endif +#ifndef OPENSSL_NO_SRTP + if (s->srtp_profiles) + sk_SRTP_PROTECTION_PROFILE_free(s->srtp_profiles); +#endif + OPENSSL_free(s); } @@ -1017,10 +1052,21 @@ int SSL_shutdown(SSL *s) int SSL_renegotiate(SSL *s) { - if (s->new_session == 0) - { - s->new_session=1; - } + if (s->renegotiate == 0) + s->renegotiate=1; + + s->new_session=1; + + return(s->method->ssl_renegotiate(s)); + } + +int SSL_renegotiate_abbreviated(SSL *s) + { + if (s->renegotiate == 0) + s->renegotiate=1; + + s->new_session=0; + return(s->method->ssl_renegotiate(s)); } @@ -1028,7 +1074,7 @@ int SSL_renegotiate_pending(SSL *s) { /* becomes true when negotiation is requested; * false again once a handshake has finished */ - return (s->new_session != 0); + return (s->renegotiate != 0); } long SSL_ctrl(SSL *s,int cmd,long larg,void *parg) @@ -1063,6 +1109,11 @@ long SSL_ctrl(SSL *s,int cmd,long larg,void *parg) s->max_cert_list=larg; return(l); case SSL_CTRL_SET_MTU: +#ifndef OPENSSL_NO_DTLS1 + if (larg < (long)dtls1_min_mtu()) + return 0; +#endif + if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) { @@ -1390,6 +1441,10 @@ int ssl_cipher_list_to_bytes(SSL *s,STACK_OF(SSL_CIPHER) *sk,unsigned char *p, for (i=0; ialgorithm_ssl & SSL_TLSV1_2) && + (TLS1_get_client_version(s) < TLS1_2_VERSION)) + continue; #ifndef OPENSSL_NO_KRB5 if (((c->algorithm_mkey & SSL_kKRB5) || (c->algorithm_auth & SSL_aKRB5)) && nokrb5) @@ -1397,7 +1452,7 @@ int ssl_cipher_list_to_bytes(SSL *s,STACK_OF(SSL_CIPHER) *sk,unsigned char *p, #endif /* OPENSSL_NO_KRB5 */ #ifndef OPENSSL_NO_PSK /* with PSK there must be client callback set */ - if (((c->algorithm_mkey & SSL_kPSK) || (c->algorithm_auth & SSL_aPSK)) && + if ((c->algorithm_auth & SSL_aPSK) && s->psk_client_callback == NULL) continue; #endif /* OPENSSL_NO_PSK */ @@ -1407,7 +1462,7 @@ int ssl_cipher_list_to_bytes(SSL *s,STACK_OF(SSL_CIPHER) *sk,unsigned char *p, /* If p == q, no ciphers and caller indicates an error. Otherwise * add SCSV if not renegotiating. */ - if (p != q && !s->new_session) + if (p != q && !s->renegotiate) { static SSL_CIPHER scsv = { @@ -1454,7 +1509,7 @@ STACK_OF(SSL_CIPHER) *ssl_bytes_to_cipher_list(SSL *s,unsigned char *p,int num, (p[n-1] == (SSL3_CK_SCSV & 0xff))) { /* SCSV fatal if renegotiating */ - if (s->new_session) + if (s->renegotiate) { SSLerr(SSL_F_SSL_BYTES_TO_CIPHER_LIST,SSL_R_SCSV_RECEIVED_WHEN_RENEGOTIATING); ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_HANDSHAKE_FAILURE); @@ -1627,10 +1682,93 @@ void SSL_CTX_set_next_proto_select_cb(SSL_CTX *ctx, int (*cb) (SSL *s, unsigned ctx->next_proto_select_cb = cb; ctx->next_proto_select_cb_arg = arg; } - # endif + +/* SSL_CTX_set_alpn_protos sets the ALPN protocol list on |ctx| to |protos|. + * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit + * length-prefixed strings). + * + * Returns 0 on success. */ +int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char* protos, + unsigned protos_len) + { + if (ctx->alpn_client_proto_list) + OPENSSL_free(ctx->alpn_client_proto_list); + + ctx->alpn_client_proto_list = OPENSSL_malloc(protos_len); + if (!ctx->alpn_client_proto_list) + return 1; + memcpy(ctx->alpn_client_proto_list, protos, protos_len); + ctx->alpn_client_proto_list_len = protos_len; + + return 0; + } + +/* SSL_set_alpn_protos sets the ALPN protocol list on |ssl| to |protos|. + * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit + * length-prefixed strings). + * + * Returns 0 on success. */ +int SSL_set_alpn_protos(SSL *ssl, const unsigned char* protos, + unsigned protos_len) + { + if (ssl->alpn_client_proto_list) + OPENSSL_free(ssl->alpn_client_proto_list); + + ssl->alpn_client_proto_list = OPENSSL_malloc(protos_len); + if (!ssl->alpn_client_proto_list) + return 1; + memcpy(ssl->alpn_client_proto_list, protos, protos_len); + ssl->alpn_client_proto_list_len = protos_len; + + return 0; + } + +/* SSL_CTX_set_alpn_select_cb sets a callback function on |ctx| that is called + * during ClientHello processing in order to select an ALPN protocol from the + * client's list of offered protocols. */ +void SSL_CTX_set_alpn_select_cb(SSL_CTX* ctx, + int (*cb) (SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg), + void *arg) + { + ctx->alpn_select_cb = cb; + ctx->alpn_select_cb_arg = arg; + } + +/* SSL_get0_alpn_selected gets the selected ALPN protocol (if any) from |ssl|. + * On return it sets |*data| to point to |*len| bytes of protocol name (not + * including the leading length-prefix byte). If the server didn't respond with + * a negotiated protocol then |*len| will be zero. */ +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned *len) + { + *data = NULL; + if (ssl->s3) + *data = ssl->s3->alpn_selected; + if (*data == NULL) + *len = 0; + else + *len = ssl->s3->alpn_selected_len; + } #endif +int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, + const char *label, size_t llen, const unsigned char *p, size_t plen, + int use_context) + { + if (s->version < TLS1_VERSION) + return -1; + + return s->method->ssl3_enc->export_keying_material(s, out, olen, label, + llen, p, plen, + use_context); + } + static unsigned long ssl_session_hash(const SSL_SESSION *a) { unsigned long l; @@ -1674,6 +1812,14 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) return(NULL); } +#ifdef OPENSSL_FIPS + if (FIPS_mode() && (meth->version < TLS1_VERSION)) + { + SSLerr(SSL_F_SSL_CTX_NEW, SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE); + return NULL; + } +#endif + if (SSL_get_ex_data_X509_STORE_CTX_idx() < 0) { SSLerr(SSL_F_SSL_CTX_NEW,SSL_R_X509_VERIFICATION_SETUP_PROBLEMS); @@ -1777,7 +1923,9 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) CRYPTO_new_ex_data(CRYPTO_EX_INDEX_SSL_CTX, ret, &ret->ex_data); ret->extra_certs=NULL; - ret->comp_methods=SSL_COMP_get_compression_methods(); + /* No compression for DTLS */ + if (meth->version != DTLS1_VERSION) + ret->comp_methods=SSL_COMP_get_compression_methods(); ret->max_send_fragment = SSL3_RT_MAX_PLAIN_LENGTH; @@ -1803,6 +1951,9 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) ret->psk_client_callback=NULL; ret->psk_server_callback=NULL; #endif +#ifndef OPENSSL_NO_SRP + SSL_CTX_SRP_CTX_init(ret); +#endif #ifndef OPENSSL_NO_BUF_FREELISTS ret->freelist_max_len = SSL_MAX_BUF_FREELIST_LEN_DEFAULT; ret->rbuf_freelist = OPENSSL_malloc(sizeof(SSL3_BUF_FREELIST)); @@ -1931,10 +2082,18 @@ void SSL_CTX_free(SSL_CTX *a) a->comp_methods = NULL; #endif +#ifndef OPENSSL_NO_SRTP + if (a->srtp_profiles) + sk_SRTP_PROTECTION_PROFILE_free(a->srtp_profiles); +#endif + #ifndef OPENSSL_NO_PSK if (a->psk_identity_hint) OPENSSL_free(a->psk_identity_hint); #endif +#ifndef OPENSSL_NO_SRP + SSL_CTX_SRP_CTX_free(a); +#endif #ifndef OPENSSL_NO_ENGINE if (a->client_cert_engine) ENGINE_finish(a->client_cert_engine); @@ -1947,6 +2106,13 @@ void SSL_CTX_free(SSL_CTX *a) ssl_buf_freelist_free(a->rbuf_freelist); #endif +#ifndef OPENSSL_NO_TLSEXT + if (a->tlsext_channel_id_private) + EVP_PKEY_free(a->tlsext_channel_id_private); + if (a->alpn_client_proto_list != NULL) + OPENSSL_free(a->alpn_client_proto_list); +#endif + OPENSSL_free(a); } @@ -2188,12 +2354,13 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) #ifndef OPENSSL_NO_EC -int ssl_check_srvr_ecc_cert_and_alg(X509 *x, const SSL_CIPHER *cs) +int ssl_check_srvr_ecc_cert_and_alg(X509 *x, SSL *s) { unsigned long alg_k, alg_a; EVP_PKEY *pkey = NULL; int keysize = 0; int signature_nid = 0, md_nid = 0, pk_nid = 0; + const SSL_CIPHER *cs = s->s3->tmp.new_cipher; alg_k = cs->algorithm_mkey; alg_a = cs->algorithm_auth; @@ -2223,7 +2390,7 @@ int ssl_check_srvr_ecc_cert_and_alg(X509 *x, const SSL_CIPHER *cs) SSLerr(SSL_F_SSL_CHECK_SRVR_ECC_CERT_AND_ALG, SSL_R_ECC_CERT_NOT_FOR_KEY_AGREEMENT); return 0; } - if (alg_k & SSL_kECDHe) + if ((alg_k & SSL_kECDHe) && TLS1_get_version(s) < TLS1_2_VERSION) { /* signature alg must be ECDSA */ if (pk_nid != NID_X9_62_id_ecPublicKey) @@ -2232,7 +2399,7 @@ int ssl_check_srvr_ecc_cert_and_alg(X509 *x, const SSL_CIPHER *cs) return 0; } } - if (alg_k & SSL_kECDHr) + if ((alg_k & SSL_kECDHr) && TLS1_get_version(s) < TLS1_2_VERSION) { /* signature alg must be RSA */ @@ -2259,7 +2426,7 @@ int ssl_check_srvr_ecc_cert_and_alg(X509 *x, const SSL_CIPHER *cs) #endif /* THIS NEEDS CLEANING UP */ -X509 *ssl_get_server_send_cert(SSL *s) +CERT_PKEY *ssl_get_server_send_pkey(const SSL *s) { unsigned long alg_k,alg_a; CERT *c; @@ -2314,15 +2481,23 @@ X509 *ssl_get_server_send_cert(SSL *s) i=SSL_PKEY_GOST01; else /* if (alg_a & SSL_aNULL) */ { - SSLerr(SSL_F_SSL_GET_SERVER_SEND_CERT,ERR_R_INTERNAL_ERROR); + SSLerr(SSL_F_SSL_GET_SERVER_SEND_PKEY,ERR_R_INTERNAL_ERROR); return(NULL); } - if (c->pkeys[i].x509 == NULL) return(NULL); - return(c->pkeys[i].x509); + return c->pkeys + i; + } + +X509 *ssl_get_server_send_cert(const SSL *s) + { + CERT_PKEY *cpk; + cpk = ssl_get_server_send_pkey(s); + if (!cpk) + return NULL; + return cpk->x509; } -EVP_PKEY *ssl_get_sign_pkey(SSL *s,const SSL_CIPHER *cipher) +EVP_PKEY *ssl_get_sign_pkey(SSL *s,const SSL_CIPHER *cipher, const EVP_MD **pmd) { unsigned long alg_a; CERT *c; @@ -2330,26 +2505,37 @@ EVP_PKEY *ssl_get_sign_pkey(SSL *s,const SSL_CIPHER *cipher) alg_a = cipher->algorithm_auth; c=s->cert; + /* SHA1 is the default for all signature algorithms up to TLS 1.2, + * except RSA which is handled specially in s3_srvr.c */ + if (pmd) + *pmd = EVP_sha1(); + if ((alg_a & SSL_aDSS) && - (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL)) - return(c->pkeys[SSL_PKEY_DSA_SIGN].privatekey); + (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL)) + { + if (pmd && s->s3 && s->s3->digest_dsa) + *pmd = s->s3->digest_dsa; + return c->pkeys[SSL_PKEY_DSA_SIGN].privatekey; + } else if (alg_a & SSL_aRSA) { + if (pmd && s->s3 && s->s3->digest_rsa) + *pmd = s->s3->digest_rsa; if (c->pkeys[SSL_PKEY_RSA_SIGN].privatekey != NULL) - return(c->pkeys[SSL_PKEY_RSA_SIGN].privatekey); - else if (c->pkeys[SSL_PKEY_RSA_ENC].privatekey != NULL) - return(c->pkeys[SSL_PKEY_RSA_ENC].privatekey); - else - return(NULL); + return c->pkeys[SSL_PKEY_RSA_SIGN].privatekey; + if (c->pkeys[SSL_PKEY_RSA_ENC].privatekey != NULL) + return c->pkeys[SSL_PKEY_RSA_ENC].privatekey; } else if ((alg_a & SSL_aECDSA) && (c->pkeys[SSL_PKEY_ECC].privatekey != NULL)) - return(c->pkeys[SSL_PKEY_ECC].privatekey); - else /* if (alg_a & SSL_aNULL) */ { - SSLerr(SSL_F_SSL_GET_SIGN_PKEY,ERR_R_INTERNAL_ERROR); - return(NULL); + if (pmd && s->s3 && s->s3->digest_ecdsa) + *pmd = s->s3->digest_ecdsa; + return c->pkeys[SSL_PKEY_ECC].privatekey; } + + SSLerr(SSL_F_SSL_GET_SIGN_PKEY,ERR_R_INTERNAL_ERROR); + return(NULL); } void ssl_update_cache(SSL *s,int mode) @@ -2574,7 +2760,11 @@ SSL_METHOD *ssl_bad_method(int ver) static const char *ssl_get_version(int version) { - if (version == TLS1_VERSION) + if (version == TLS1_2_VERSION) + return("TLSv1.2"); + else if (version == TLS1_1_VERSION) + return("TLSv1.1"); + else if (version == TLS1_VERSION) return("TLSv1"); else if (version == SSL3_VERSION) return("SSLv3"); @@ -2602,12 +2792,8 @@ const char* SSL_authentication_method(const SSL* ssl) { case SSL2_VERSION: return SSL_TXT_RSA; - case SSL3_VERSION: - case TLS1_VERSION: - case DTLS1_VERSION: - return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher); default: - return "UNKNOWN"; + return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher); } } @@ -2695,6 +2881,7 @@ SSL *SSL_dup(SSL *s) ret->in_handshake = s->in_handshake; ret->handshake_func = s->handshake_func; ret->server = s->server; + ret->renegotiate = s->renegotiate; ret->new_session = s->new_session; ret->quiet_shutdown = s->quiet_shutdown; ret->shutdown=s->shutdown; @@ -2960,6 +3147,11 @@ int SSL_state(const SSL *ssl) return(ssl->state); } +void SSL_set_state(SSL *ssl, int state) + { + ssl->state = state; + } + void SSL_set_verify_result(SSL *ssl,long arg) { ssl->verify_result=arg; @@ -3123,32 +3315,54 @@ int SSL_use_psk_identity_hint(SSL *s, const char *identity_hint) if (s == NULL) return 0; - if (s->session == NULL) - return 1; /* session not created yet, ignored */ - if (identity_hint != NULL && strlen(identity_hint) > PSK_MAX_IDENTITY_LEN) { SSLerr(SSL_F_SSL_USE_PSK_IDENTITY_HINT, SSL_R_DATA_LENGTH_TOO_LONG); return 0; } - if (s->session->psk_identity_hint != NULL) + + /* Clear hint in SSL and associated SSL_SESSION (if any). */ + if (s->psk_identity_hint != NULL) + { + OPENSSL_free(s->psk_identity_hint); + s->psk_identity_hint = NULL; + } + if (s->session != NULL && s->session->psk_identity_hint != NULL) + { OPENSSL_free(s->session->psk_identity_hint); + s->session->psk_identity_hint = NULL; + } + if (identity_hint != NULL) { - s->session->psk_identity_hint = BUF_strdup(identity_hint); - if (s->session->psk_identity_hint == NULL) - return 0; + /* The hint is stored in SSL and SSL_SESSION with the one in + * SSL_SESSION taking precedence. Thus, if SSL_SESSION is avaiable, + * we store the hint there, otherwise we store it in SSL. */ + if (s->session != NULL) + { + s->session->psk_identity_hint = BUF_strdup(identity_hint); + if (s->session->psk_identity_hint == NULL) + return 0; + } + else + { + s->psk_identity_hint = BUF_strdup(identity_hint); + if (s->psk_identity_hint == NULL) + return 0; + } } - else - s->session->psk_identity_hint = NULL; return 1; } const char *SSL_get_psk_identity_hint(const SSL *s) { - if (s == NULL || s->session == NULL) + if (s == NULL) return NULL; - return(s->session->psk_identity_hint); + /* The hint is stored in SSL and SSL_SESSION with the one in SSL_SESSION + * taking precedence. */ + if (s->session != NULL) + return(s->session->psk_identity_hint); + return(s->psk_identity_hint); } const char *SSL_get_psk_identity(const SSL *s) @@ -3231,6 +3445,16 @@ void ssl_clear_hash_ctx(EVP_MD_CTX **hash) *hash=NULL; } +void SSL_set_debug(SSL *s, int debug) + { + s->debug = debug; + } + +int SSL_cache_hit(SSL *s) + { + return s->hit; + } + #if defined(_WINDLL) && defined(OPENSSL_SYS_WIN16) #include "../crypto/bio/bss_file.c" #endif @@ -3239,4 +3463,3 @@ IMPLEMENT_STACK_OF(SSL_CIPHER) IMPLEMENT_STACK_OF(SSL_COMP) IMPLEMENT_OBJ_BSEARCH_GLOBAL_CMP_FN(SSL_CIPHER, SSL_CIPHER, ssl_cipher_id); - diff --git a/app/openssl/ssl/ssl_locl.h b/app/openssl/ssl/ssl_locl.h index 25f8e16c..f79ab009 100644 --- a/app/openssl/ssl/ssl_locl.h +++ b/app/openssl/ssl/ssl_locl.h @@ -170,7 +170,7 @@ # define OPENSSL_EXTERN OPENSSL_EXPORT #endif -#define PKCS1_CHECK +#undef PKCS1_CHECK #define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \ l|=(((unsigned long)(*((c)++)))<< 8), \ @@ -215,6 +215,15 @@ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ *((c)++)=(unsigned char)(((l) )&0xff)) +#define l2n8(l,c) (*((c)++)=(unsigned char)(((l)>>56)&0xff), \ + *((c)++)=(unsigned char)(((l)>>48)&0xff), \ + *((c)++)=(unsigned char)(((l)>>40)&0xff), \ + *((c)++)=(unsigned char)(((l)>>32)&0xff), \ + *((c)++)=(unsigned char)(((l)>>24)&0xff), \ + *((c)++)=(unsigned char)(((l)>>16)&0xff), \ + *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ + *((c)++)=(unsigned char)(((l) )&0xff)) + #define n2l6(c,l) (l =((BN_ULLONG)(*((c)++)))<<40, \ l|=((BN_ULLONG)(*((c)++)))<<32, \ l|=((BN_ULLONG)(*((c)++)))<<24, \ @@ -289,6 +298,7 @@ #define SSL_kEECDH 0x00000080L /* ephemeral ECDH */ #define SSL_kPSK 0x00000100L /* PSK */ #define SSL_kGOST 0x00000200L /* GOST key exchange */ +#define SSL_kSRP 0x00000400L /* SRP */ /* Bits for algorithm_auth (server authentication) */ #define SSL_aRSA 0x00000001L /* RSA auth */ @@ -316,21 +326,29 @@ #define SSL_CAMELLIA256 0x00000200L #define SSL_eGOST2814789CNT 0x00000400L #define SSL_SEED 0x00000800L +#define SSL_AES128GCM 0x00001000L +#define SSL_AES256GCM 0x00002000L -#define SSL_AES (SSL_AES128|SSL_AES256) +#define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL_AES256GCM) #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) /* Bits for algorithm_mac (symmetric authentication) */ + #define SSL_MD5 0x00000001L #define SSL_SHA1 0x00000002L #define SSL_GOST94 0x00000004L #define SSL_GOST89MAC 0x00000008L +#define SSL_SHA256 0x00000010L +#define SSL_SHA384 0x00000020L +/* Not a real MAC, just an indication it is part of cipher */ +#define SSL_AEAD 0x00000040L /* Bits for algorithm_ssl (protocol version) */ #define SSL_SSLV2 0x00000001L #define SSL_SSLV3 0x00000002L #define SSL_TLSV1 SSL_SSLV3 /* for now */ +#define SSL_TLSV1_2 0x00000004L /* Bits for algorithm2 (handshake digests and other extra flags) */ @@ -338,15 +356,21 @@ #define SSL_HANDSHAKE_MAC_MD5 0x10 #define SSL_HANDSHAKE_MAC_SHA 0x20 #define SSL_HANDSHAKE_MAC_GOST94 0x40 +#define SSL_HANDSHAKE_MAC_SHA256 0x80 +#define SSL_HANDSHAKE_MAC_SHA384 0x100 #define SSL_HANDSHAKE_MAC_DEFAULT (SSL_HANDSHAKE_MAC_MD5 | SSL_HANDSHAKE_MAC_SHA) /* When adding new digest in the ssl_ciph.c and increment SSM_MD_NUM_IDX * make sure to update this constant too */ -#define SSL_MAX_DIGEST 4 +#define SSL_MAX_DIGEST 6 + +#define TLS1_PRF_DGST_MASK (0xff << TLS1_PRF_DGST_SHIFT) -#define TLS1_PRF_DGST_SHIFT 8 +#define TLS1_PRF_DGST_SHIFT 10 #define TLS1_PRF_MD5 (SSL_HANDSHAKE_MAC_MD5 << TLS1_PRF_DGST_SHIFT) #define TLS1_PRF_SHA1 (SSL_HANDSHAKE_MAC_SHA << TLS1_PRF_DGST_SHIFT) +#define TLS1_PRF_SHA256 (SSL_HANDSHAKE_MAC_SHA256 << TLS1_PRF_DGST_SHIFT) +#define TLS1_PRF_SHA384 (SSL_HANDSHAKE_MAC_SHA384 << TLS1_PRF_DGST_SHIFT) #define TLS1_PRF_GOST94 (SSL_HANDSHAKE_MAC_GOST94 << TLS1_PRF_DGST_SHIFT) #define TLS1_PRF (TLS1_PRF_MD5 | TLS1_PRF_SHA1) @@ -354,6 +378,7 @@ * (currently this also goes into algorithm2) */ #define TLS1_STREAM_MAC 0x04 +#define TLSEXT_CHANNEL_ID_SIZE 128 /* @@ -555,6 +580,10 @@ typedef struct ssl3_enc_method const char *server_finished_label; int server_finished_label_len; int (*alert_value)(int); + int (*export_keying_material)(SSL *, unsigned char *, size_t, + const char *, size_t, + const unsigned char *, size_t, + int use_context); } SSL3_ENC_METHOD; #ifndef OPENSSL_NO_COMP @@ -592,11 +621,14 @@ extern SSL3_ENC_METHOD TLSv1_enc_data; extern SSL3_ENC_METHOD SSLv3_enc_data; extern SSL3_ENC_METHOD DTLSv1_enc_data; -#define IMPLEMENT_tls1_meth_func(func_name, s_accept, s_connect, s_get_meth) \ +#define SSL_IS_DTLS(s) (s->method->version == DTLS1_VERSION) + +#define IMPLEMENT_tls_meth_func(version, func_name, s_accept, s_connect, \ + s_get_meth) \ const SSL_METHOD *func_name(void) \ { \ static const SSL_METHOD func_name##_data= { \ - TLS1_VERSION, \ + version, \ tls1_new, \ tls1_clear, \ tls1_free, \ @@ -670,7 +702,7 @@ const SSL_METHOD *func_name(void) \ const SSL_METHOD *func_name(void) \ { \ static const SSL_METHOD func_name##_data= { \ - TLS1_VERSION, \ + TLS1_2_VERSION, \ tls1_new, \ tls1_clear, \ tls1_free, \ @@ -753,7 +785,7 @@ const SSL_METHOD *func_name(void) \ ssl3_read, \ ssl3_peek, \ ssl3_write, \ - ssl3_shutdown, \ + dtls1_shutdown, \ ssl3_renegotiate, \ ssl3_renegotiate_check, \ dtls1_get_message, \ @@ -809,13 +841,15 @@ int ssl_verify_cert_chain(SSL *s,STACK_OF(X509) *sk); int ssl_undefined_function(SSL *s); int ssl_undefined_void_function(void); int ssl_undefined_const_function(const SSL *s); -X509 *ssl_get_server_send_cert(SSL *); -EVP_PKEY *ssl_get_sign_pkey(SSL *,const SSL_CIPHER *); +CERT_PKEY *ssl_get_server_send_pkey(const SSL *s); +X509 *ssl_get_server_send_cert(const SSL *); +EVP_PKEY *ssl_get_sign_pkey(SSL *s,const SSL_CIPHER *c, const EVP_MD **pmd); int ssl_cert_type(X509 *x,EVP_PKEY *pkey); void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher); STACK_OF(SSL_CIPHER) *ssl_get_ciphers_by_id(SSL *s); int ssl_verify_alarm_type(long type); void ssl_load_ciphers(void); +int ssl_fill_hello_random(SSL *s, int server, unsigned char *field, int len); int ssl2_enc_init(SSL *s, int client); int ssl2_generate_key_material(SSL *s); @@ -944,6 +978,7 @@ void dtls1_get_ccs_header(unsigned char *data, struct ccs_header_st *ccs_hdr); void dtls1_reset_seq_numbers(SSL *s, int rw); long dtls1_default_timeout(void); struct timeval* dtls1_get_timeout(SSL *s, struct timeval* timeleft); +int dtls1_check_timeout_num(SSL *s); int dtls1_handle_timeout(SSL *s); const SSL_CIPHER *dtls1_get_cipher(unsigned int u); void dtls1_start_timer(SSL *s); @@ -951,6 +986,7 @@ void dtls1_stop_timer(SSL *s); int dtls1_is_timer_expired(SSL *s); void dtls1_double_timeout(SSL *s); int dtls1_send_newsession_ticket(SSL *s); +unsigned int dtls1_min_mtu(void); /* some client-only functions */ int ssl3_client_hello(SSL *s); @@ -970,6 +1006,7 @@ int ssl3_check_cert_and_algorithm(SSL *s); int ssl3_check_finished(SSL *s); # ifndef OPENSSL_NO_NEXTPROTONEG int ssl3_send_next_proto(SSL *s); +int ssl3_send_channel_id(SSL *s); # endif #endif @@ -992,6 +1029,7 @@ int ssl3_get_cert_verify(SSL *s); #ifndef OPENSSL_NO_NEXTPROTONEG int ssl3_get_next_proto(SSL *s); #endif +int ssl3_get_channel_id(SSL *s); int dtls1_send_hello_request(SSL *s); int dtls1_send_server_hello(SSL *s); @@ -1019,11 +1057,10 @@ int dtls1_connect(SSL *s); void dtls1_free(SSL *s); void dtls1_clear(SSL *s); long dtls1_ctrl(SSL *s,int cmd, long larg, void *parg); +int dtls1_shutdown(SSL *s); long dtls1_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok); int dtls1_get_record(SSL *s); -int do_dtls1_write(SSL *s, int type, const unsigned char *buf, - unsigned int len, int create_empty_fragement); int dtls1_dispatch_alert(SSL *s); int dtls1_enc(SSL *s, int snd); @@ -1039,12 +1076,15 @@ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p); int tls1_mac(SSL *ssl, unsigned char *md, int snd); int tls1_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, int len); +int tls1_export_keying_material(SSL *s, unsigned char *out, size_t olen, + const char *label, size_t llen, + const unsigned char *p, size_t plen, int use_context); int tls1_alert_code(int code); int ssl3_alert_code(int code); int ssl_ok(SSL *s); #ifndef OPENSSL_NO_ECDH -int ssl_check_srvr_ecc_cert_and_alg(X509 *x, const SSL_CIPHER *cs); +int ssl_check_srvr_ecc_cert_and_alg(X509 *x, SSL *s); #endif SSL_COMP *ssl3_comp_find(STACK_OF(SSL_COMP) *sk, int n); @@ -1061,9 +1101,17 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **data, unsigned char *d, int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **data, unsigned char *d, int n, int *al); int ssl_prepare_clienthello_tlsext(SSL *s); int ssl_prepare_serverhello_tlsext(SSL *s); -int ssl_check_clienthello_tlsext(SSL *s); +int ssl_check_clienthello_tlsext_early(SSL *s); +int ssl_check_clienthello_tlsext_late(SSL *s); int ssl_check_serverhello_tlsext(SSL *s); +#ifndef OPENSSL_NO_HEARTBEATS +int tls1_heartbeat(SSL *s); +int dtls1_heartbeat(SSL *s); +int tls1_process_heartbeat(SSL *s); +int dtls1_process_heartbeat(SSL *s); +#endif + #ifdef OPENSSL_NO_SHA256 #define tlsext_tick_md EVP_sha1 #else @@ -1071,7 +1119,15 @@ int ssl_check_serverhello_tlsext(SSL *s); #endif int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, const unsigned char *limit, SSL_SESSION **ret); + +int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk, + const EVP_MD *md); +int tls12_get_sigid(const EVP_PKEY *pk); +const EVP_MD *tls12_get_hash(unsigned char hash_alg); + +int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s); #endif + EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ; void ssl_clear_hash_ctx(EVP_MD_CTX **hash); int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len, @@ -1082,4 +1138,42 @@ int ssl_add_clienthello_renegotiate_ext(SSL *s, unsigned char *p, int *len, int maxlen); int ssl_parse_clienthello_renegotiate_ext(SSL *s, unsigned char *d, int len, int *al); +long ssl_get_algorithm2(SSL *s); +void tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize); +int tls12_get_req_sig_algs(SSL *s, unsigned char *p); + +int ssl_add_clienthello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen); +int ssl_parse_clienthello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al); +int ssl_add_serverhello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen); +int ssl_parse_serverhello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al); + +/* s3_cbc.c */ +void ssl3_cbc_copy_mac(unsigned char* out, + const SSL3_RECORD *rec, + unsigned md_size,unsigned orig_len); +int ssl3_cbc_remove_padding(const SSL* s, + SSL3_RECORD *rec, + unsigned block_size, + unsigned mac_size); +int tls1_cbc_remove_padding(const SSL* s, + SSL3_RECORD *rec, + unsigned block_size, + unsigned mac_size); +char ssl3_cbc_record_digest_supported(const EVP_MD_CTX *ctx); +void ssl3_cbc_digest_record( + const EVP_MD_CTX *ctx, + unsigned char* md_out, + size_t* md_out_size, + const unsigned char header[13], + const unsigned char *data, + size_t data_plus_mac_size, + size_t data_plus_mac_plus_padding_size, + const unsigned char *mac_secret, + unsigned mac_secret_length, + char is_sslv3); + +void tls_fips_digest_extra( + const EVP_CIPHER_CTX *cipher_ctx, EVP_MD_CTX *mac_ctx, + const unsigned char *data, size_t data_len, size_t orig_len); + #endif diff --git a/app/openssl/ssl/ssl_rsa.c b/app/openssl/ssl/ssl_rsa.c index c43f3e2a..e98e862d 100644 --- a/app/openssl/ssl/ssl_rsa.c +++ b/app/openssl/ssl/ssl_rsa.c @@ -714,6 +714,8 @@ int SSL_use_certificate_chain(SSL *ssl, STACK_OF(X509) *cert_chain) SSLerr(SSL_F_SSL_USE_CERTIFICATE_CHAIN,SSL_R_NO_CERTIFICATE_ASSIGNED); return(0); } + if (ssl->cert->key->cert_chain != NULL) + sk_X509_pop_free(ssl->cert->key->cert_chain, X509_free); ssl->cert->key->cert_chain = cert_chain; return(1); } @@ -746,7 +748,7 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file) ERR_clear_error(); /* clear error stack for SSL_CTX_use_certificate() */ - in=BIO_new(BIO_s_file_internal()); + in = BIO_new(BIO_s_file_internal()); if (in == NULL) { SSLerr(SSL_F_SSL_CTX_USE_CERTIFICATE_CHAIN_FILE,ERR_R_BUF_LIB); @@ -759,14 +761,16 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file) goto end; } - x=PEM_read_bio_X509_AUX(in,NULL,ctx->default_passwd_callback,ctx->default_passwd_callback_userdata); + x=PEM_read_bio_X509_AUX(in,NULL,ctx->default_passwd_callback, + ctx->default_passwd_callback_userdata); if (x == NULL) { SSLerr(SSL_F_SSL_CTX_USE_CERTIFICATE_CHAIN_FILE,ERR_R_PEM_LIB); goto end; } - ret=SSL_CTX_use_certificate(ctx,x); + ret = SSL_CTX_use_certificate(ctx, x); + if (ERR_peek_error() != 0) ret = 0; /* Key/certificate mismatch doesn't imply ret==0 ... */ if (ret) @@ -778,13 +782,15 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file) int r; unsigned long err; - if (ctx->extra_certs != NULL) + if (ctx->extra_certs != NULL) { sk_X509_pop_free(ctx->extra_certs, X509_free); ctx->extra_certs = NULL; } - while ((ca = PEM_read_bio_X509(in,NULL,ctx->default_passwd_callback,ctx->default_passwd_callback_userdata)) + while ((ca = PEM_read_bio_X509(in, NULL, + ctx->default_passwd_callback, + ctx->default_passwd_callback_userdata)) != NULL) { r = SSL_CTX_add_extra_chain_cert(ctx, ca); diff --git a/app/openssl/ssl/ssl_sess.c b/app/openssl/ssl/ssl_sess.c index 93954e48..ec088404 100644 --- a/app/openssl/ssl/ssl_sess.c +++ b/app/openssl/ssl/ssl_sess.c @@ -217,6 +217,9 @@ SSL_SESSION *SSL_SESSION_new(void) #ifndef OPENSSL_NO_PSK ss->psk_identity_hint=NULL; ss->psk_identity=NULL; +#endif +#ifndef OPENSSL_NO_SRP + ss->srp_username=NULL; #endif return(ss); } @@ -228,6 +231,11 @@ const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, unsigned int *len) return s->session_id; } +unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s) + { + return s->compress_meth; + } + /* Even with SSLv2, we have 16 bytes (128 bits) of session ID space. SSLv3/TLSv1 * has 32 bytes (256 bits). As such, filling the ID with random gunk repeatedly * until we have no conflict is going to complete in one iteration pretty much @@ -307,6 +315,16 @@ int ssl_get_new_session(SSL *s, int session) ss->ssl_version=TLS1_VERSION; ss->session_id_length=SSL3_SSL_SESSION_ID_LENGTH; } + else if (s->version == TLS1_1_VERSION) + { + ss->ssl_version=TLS1_1_VERSION; + ss->session_id_length=SSL3_SSL_SESSION_ID_LENGTH; + } + else if (s->version == TLS1_2_VERSION) + { + ss->ssl_version=TLS1_2_VERSION; + ss->session_id_length=SSL3_SSL_SESSION_ID_LENGTH; + } else if (s->version == DTLS1_BAD_VER) { ss->ssl_version=DTLS1_BAD_VER; @@ -408,6 +426,18 @@ int ssl_get_new_session(SSL *s, int session) memcpy(ss->tlsext_ellipticcurvelist, s->tlsext_ellipticcurvelist, s->tlsext_ellipticcurvelist_length); } #endif +#endif +#ifndef OPENSSL_NO_PSK + if (s->psk_identity_hint) + { + ss->psk_identity_hint = BUF_strdup(s->psk_identity_hint); + if (ss->psk_identity_hint == NULL) + { + SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE); + SSL_SESSION_free(ss); + return 0; + } + } #endif } else @@ -430,6 +460,25 @@ int ssl_get_new_session(SSL *s, int session) return(1); } +/* ssl_get_prev attempts to find an SSL_SESSION to be used to resume this + * connection. It is only called by servers. + * + * session_id: points at the session ID in the ClientHello. This code will + * read past the end of this in order to parse out the session ticket + * extension, if any. + * len: the length of the session ID. + * limit: a pointer to the first byte after the ClientHello. + * + * Returns: + * -1: error + * 0: a session may have been found. + * + * Side effects: + * - If a session is found then s->session is pointed at it (after freeing an + * existing session if need be) and s->verify_result is set from the session. + * - Both for new and resumed sessions, s->tlsext_ticket_expected is set to 1 + * if the server should issue a new session ticket (to 0 otherwise). + */ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, const unsigned char *limit) { @@ -437,27 +486,39 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, SSL_SESSION *ret=NULL; int fatal = 0; + int try_session_cache = 1; #ifndef OPENSSL_NO_TLSEXT int r; #endif if (len > SSL_MAX_SSL_SESSION_ID_LENGTH) goto err; + + if (len == 0) + try_session_cache = 0; + #ifndef OPENSSL_NO_TLSEXT - r = tls1_process_ticket(s, session_id, len, limit, &ret); - if (r == -1) + r = tls1_process_ticket(s, session_id, len, limit, &ret); /* sets s->tlsext_ticket_expected */ + switch (r) { + case -1: /* Error during processing */ fatal = 1; goto err; + case 0: /* No ticket found */ + case 1: /* Zero length ticket found */ + break; /* Ok to carry on processing session id. */ + case 2: /* Ticket found but not decrypted. */ + case 3: /* Ticket decrypted, *ret has been set. */ + try_session_cache = 0; + break; + default: + abort(); } - else if (r == 0 || (!ret && !len)) - goto err; - else if (!ret && !(s->session_ctx->session_cache_mode & SSL_SESS_CACHE_NO_INTERNAL_LOOKUP)) -#else - if (len == 0) - goto err; - if (!(s->session_ctx->session_cache_mode & SSL_SESS_CACHE_NO_INTERNAL_LOOKUP)) #endif + + if (try_session_cache && + ret == NULL && + !(s->session_ctx->session_cache_mode & SSL_SESS_CACHE_NO_INTERNAL_LOOKUP)) { SSL_SESSION data; data.ssl_version=s->version; @@ -468,20 +529,22 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, CRYPTO_r_lock(CRYPTO_LOCK_SSL_CTX); ret=lh_SSL_SESSION_retrieve(s->session_ctx->sessions,&data); if (ret != NULL) - /* don't allow other threads to steal it: */ - CRYPTO_add(&ret->references,1,CRYPTO_LOCK_SSL_SESSION); + { + /* don't allow other threads to steal it: */ + CRYPTO_add(&ret->references,1,CRYPTO_LOCK_SSL_SESSION); + } CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX); + if (ret == NULL) + s->session_ctx->stats.sess_miss++; } - if (ret == NULL) + if (try_session_cache && + ret == NULL && + s->session_ctx->get_session_cb != NULL) { int copy=1; - s->session_ctx->stats.sess_miss++; - ret=NULL; - if (s->session_ctx->get_session_cb != NULL - && (ret=s->session_ctx->get_session_cb(s,session_id,len,©)) - != NULL) + if ((ret=s->session_ctx->get_session_cb(s,session_id,len,©))) { s->session_ctx->stats.sess_cb_hit++; @@ -500,23 +563,18 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, * things are very strange */ SSL_CTX_add_session(s->session_ctx,ret); } - if (ret == NULL) - goto err; } - /* Now ret is non-NULL, and we own one of its reference counts. */ + if (ret == NULL) + goto err; + + /* Now ret is non-NULL and we own one of its reference counts. */ if (ret->sid_ctx_length != s->sid_ctx_length || memcmp(ret->sid_ctx,s->sid_ctx,ret->sid_ctx_length)) { - /* We've found the session named by the client, but we don't + /* We have the session requested by the client, but we don't * want to use it in this context. */ - -#if 0 /* The client cannot always know when a session is not appropriate, - * so we shouldn't generate an error message. */ - - SSLerr(SSL_F_SSL_GET_PREV_SESSION,SSL_R_ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT); -#endif goto err; /* treat like cache miss */ } @@ -553,39 +611,38 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, goto err; } - -#if 0 /* This is way too late. */ - - /* If a thread got the session, then 'swaped', and another got - * it and then due to a time-out decided to 'OPENSSL_free' it we could - * be in trouble. So I'll increment it now, then double decrement - * later - am I speaking rubbish?. */ - CRYPTO_add(&ret->references,1,CRYPTO_LOCK_SSL_SESSION); -#endif - if (ret->timeout < (long)(time(NULL) - ret->time)) /* timeout */ { s->session_ctx->stats.sess_timeout++; - /* remove it from the cache */ - SSL_CTX_remove_session(s->session_ctx,ret); + if (try_session_cache) + { + /* session was from the cache, so remove it */ + SSL_CTX_remove_session(s->session_ctx,ret); + } goto err; } s->session_ctx->stats.sess_hit++; - /* ret->time=time(NULL); */ /* rezero timeout? */ - /* again, just leave the session - * if it is the same session, we have just incremented and - * then decremented the reference count :-) */ if (s->session != NULL) SSL_SESSION_free(s->session); s->session=ret; s->verify_result = s->session->verify_result; - return(1); + return 1; err: if (ret != NULL) + { SSL_SESSION_free(ret); +#ifndef OPENSSL_NO_TLSEXT + if (!try_session_cache) + { + /* The session was from a ticket, so we should + * issue a ticket for the new session */ + s->tlsext_ticket_expected = 1; + } +#endif + } if (fatal) return -1; else @@ -735,6 +792,10 @@ void SSL_SESSION_free(SSL_SESSION *ss) OPENSSL_free(ss->psk_identity_hint); if (ss->psk_identity != NULL) OPENSSL_free(ss->psk_identity); +#endif +#ifndef OPENSSL_NO_SRP + if (ss->srp_username != NULL) + OPENSSL_free(ss->srp_username); #endif OPENSSL_cleanse(ss,sizeof(*ss)); OPENSSL_free(ss); @@ -760,10 +821,6 @@ int SSL_set_session(SSL *s, SSL_SESSION *session) { if (!SSL_set_ssl_method(s,meth)) return(0); - if (s->ctx->session_timeout == 0) - session->timeout=SSL_get_default_timeout(s); - else - session->timeout=s->ctx->session_timeout; } #ifndef OPENSSL_NO_KRB5 @@ -831,6 +888,25 @@ long SSL_SESSION_set_time(SSL_SESSION *s, long t) return(t); } +X509 *SSL_SESSION_get0_peer(SSL_SESSION *s) + { + return s->peer; + } + +int SSL_SESSION_set1_id_context(SSL_SESSION *s,const unsigned char *sid_ctx, + unsigned int sid_ctx_len) + { + if(sid_ctx_len > SSL_MAX_SID_CTX_LENGTH) + { + SSLerr(SSL_F_SSL_SESSION_SET1_ID_CONTEXT,SSL_R_SSL_SESSION_ID_CONTEXT_TOO_LONG); + return 0; + } + s->sid_ctx_length=sid_ctx_len; + memcpy(s->sid_ctx,sid_ctx,sid_ctx_len); + + return 1; + } + long SSL_CTX_set_timeout(SSL_CTX *s, long t) { long l; diff --git a/app/openssl/ssl/ssl_txt.c b/app/openssl/ssl/ssl_txt.c index 3122440e..6479d52c 100644 --- a/app/openssl/ssl/ssl_txt.c +++ b/app/openssl/ssl/ssl_txt.c @@ -115,6 +115,10 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) s="SSLv2"; else if (x->ssl_version == SSL3_VERSION) s="SSLv3"; + else if (x->ssl_version == TLS1_2_VERSION) + s="TLSv1.2"; + else if (x->ssl_version == TLS1_1_VERSION) + s="TLSv1.1"; else if (x->ssl_version == TLS1_VERSION) s="TLSv1"; else if (x->ssl_version == DTLS1_VERSION) @@ -187,6 +191,10 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) if (BIO_puts(bp,"\n PSK identity hint: ") <= 0) goto err; if (BIO_printf(bp, "%s", x->psk_identity_hint ? x->psk_identity_hint : "None") <= 0) goto err; #endif +#ifndef OPENSSL_NO_SRP + if (BIO_puts(bp,"\n SRP username: ") <= 0) goto err; + if (BIO_printf(bp, "%s", x->srp_username ? x->srp_username : "None") <= 0) goto err; +#endif #ifndef OPENSSL_NO_TLSEXT if (x->tlsext_tick_lifetime_hint) { diff --git a/app/openssl/ssl/ssltest.c b/app/openssl/ssl/ssltest.c index f6a2c79d..28fa223f 100644 --- a/app/openssl/ssl/ssltest.c +++ b/app/openssl/ssl/ssltest.c @@ -181,6 +181,9 @@ #ifndef OPENSSL_NO_DH #include #endif +#ifndef OPENSSL_NO_SRP +#include +#endif #include #define _XOPEN_SOURCE_EXTENDED 1 /* Or gethostname won't be declared properly @@ -246,6 +249,49 @@ static unsigned int psk_server_callback(SSL *ssl, const char *identity, unsigned unsigned int max_psk_len); #endif +#ifndef OPENSSL_NO_SRP +/* SRP client */ +/* This is a context that we pass to all callbacks */ +typedef struct srp_client_arg_st + { + char *srppassin; + char *srplogin; + } SRP_CLIENT_ARG; + +#define PWD_STRLEN 1024 + +static char * MS_CALLBACK ssl_give_srp_client_pwd_cb(SSL *s, void *arg) + { + SRP_CLIENT_ARG *srp_client_arg = (SRP_CLIENT_ARG *)arg; + return BUF_strdup((char *)srp_client_arg->srppassin); + } + +/* SRP server */ +/* This is a context that we pass to SRP server callbacks */ +typedef struct srp_server_arg_st + { + char *expected_user; + char *pass; + } SRP_SERVER_ARG; + +static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) + { + SRP_SERVER_ARG * p = (SRP_SERVER_ARG *) arg; + + if (strcmp(p->expected_user, SSL_get_srp_username(s)) != 0) + { + fprintf(stderr, "User %s doesn't exist\n", SSL_get_srp_username(s)); + return SSL3_AL_FATAL; + } + if (SSL_set_srp_server_param_pw(s,p->expected_user,p->pass,"1024")<0) + { + *ad = SSL_AD_INTERNAL_ERROR; + return SSL3_AL_FATAL; + } + return SSL_ERROR_NONE; + } +#endif + static BIO *bio_err=NULL; static BIO *bio_stdout=NULL; @@ -268,6 +314,9 @@ static void sv_usage(void) { fprintf(stderr,"usage: ssltest [args ...]\n"); fprintf(stderr,"\n"); +#ifdef OPENSSL_FIPS + fprintf(stderr,"-F - run test in FIPS mode\n"); +#endif fprintf(stderr," -server_auth - check server certificate\n"); fprintf(stderr," -client_auth - do client authentication\n"); fprintf(stderr," -proxy - allow proxy certificates\n"); @@ -289,6 +338,10 @@ static void sv_usage(void) #ifndef OPENSSL_NO_PSK fprintf(stderr," -psk arg - PSK in hex (without 0x)\n"); #endif +#ifndef OPENSSL_NO_SRP + fprintf(stderr," -srpuser user - SRP username to use\n"); + fprintf(stderr," -srppass arg - password for 'user'\n"); +#endif #ifndef OPENSSL_NO_SSL2 fprintf(stderr," -ssl2 - use SSLv2\n"); #endif @@ -316,8 +369,6 @@ static void sv_usage(void) " (default is sect163r2).\n"); #endif fprintf(stderr," -test_cipherlist - verifies the order of the ssl cipher lists\n"); - fprintf(stderr," -c_small_records - enable client side use of small SSL record buffers\n"); - fprintf(stderr," -s_small_records - enable server side use of small SSL record buffers\n"); fprintf(stderr," -cutthrough - enable 1-RTT full-handshake for strong ciphers\n"); } @@ -447,10 +498,6 @@ int opaque_prf_input_cb(SSL *ssl, void *peerinput, size_t len, void *arg_) return arg->ret; } #endif - int ssl_mode = 0; - int c_small_records=0; - int s_small_records=0; - int cutthrough = 0; int main(int argc, char *argv[]) { @@ -482,6 +529,12 @@ int main(int argc, char *argv[]) #endif #ifndef OPENSSL_NO_ECDH EC_KEY *ecdh = NULL; +#endif +#ifndef OPENSSL_NO_SRP + /* client */ + SRP_CLIENT_ARG srp_client_arg = {NULL,NULL}; + /* server */ + SRP_SERVER_ARG srp_server_arg = {NULL,NULL}; #endif int no_dhe = 0; int no_ecdhe = 0; @@ -491,9 +544,13 @@ int main(int argc, char *argv[]) int comp = 0; #ifndef OPENSSL_NO_COMP COMP_METHOD *cm = NULL; -#endif STACK_OF(SSL_COMP) *ssl_comp_methods = NULL; +#endif int test_cipherlist = 0; +#ifdef OPENSSL_FIPS + int fips_mode=0; +#endif + int cutthrough = 0; verbose = 0; debug = 0; @@ -525,7 +582,16 @@ int main(int argc, char *argv[]) while (argc >= 1) { - if (strcmp(*argv,"-server_auth") == 0) + if(!strcmp(*argv,"-F")) + { +#ifdef OPENSSL_FIPS + fips_mode=1; +#else + fprintf(stderr,"not compiled with FIPS support, so exitting without running.\n"); + EXIT(0); +#endif + } + else if (strcmp(*argv,"-server_auth") == 0) server_auth=1; else if (strcmp(*argv,"-client_auth") == 0) client_auth=1; @@ -579,6 +645,20 @@ int main(int argc, char *argv[]) no_psk=1; #endif } +#ifndef OPENSSL_NO_SRP + else if (strcmp(*argv,"-srpuser") == 0) + { + if (--argc < 1) goto bad; + srp_server_arg.expected_user = srp_client_arg.srplogin= *(++argv); + tls1=1; + } + else if (strcmp(*argv,"-srppass") == 0) + { + if (--argc < 1) goto bad; + srp_server_arg.pass = srp_client_arg.srppassin= *(++argv); + tls1=1; + } +#endif else if (strcmp(*argv,"-ssl2") == 0) ssl2=1; else if (strcmp(*argv,"-tls1") == 0) @@ -687,14 +767,6 @@ int main(int argc, char *argv[]) { test_cipherlist = 1; } - else if (strcmp(*argv, "-c_small_records") == 0) - { - c_small_records = 1; - } - else if (strcmp(*argv, "-s_small_records") == 0) - { - s_small_records = 1; - } else if (strcmp(*argv, "-cutthrough") == 0) { cutthrough = 1; @@ -733,6 +805,20 @@ bad: EXIT(1); } +#ifdef OPENSSL_FIPS + if(fips_mode) + { + if(!FIPS_mode_set(1)) + { + ERR_load_crypto_strings(); + ERR_print_errors(BIO_new_fp(stderr,BIO_NOCLOSE)); + EXIT(1); + } + else + fprintf(stderr,"*** IN FIPS MODE ***\n"); + } +#endif + if (print_time) { if (!bio_pair) @@ -801,7 +887,13 @@ bad: meth=SSLv23_method(); #else #ifdef OPENSSL_NO_SSL2 - meth=SSLv3_method(); + if (tls1) + meth=TLSv1_method(); + else + if (ssl3) + meth=SSLv3_method(); + else + meth=SSLv23_method(); #else meth=SSLv2_method(); #endif @@ -820,26 +912,10 @@ bad: SSL_CTX_set_cipher_list(c_ctx,cipher); SSL_CTX_set_cipher_list(s_ctx,cipher); } - - ssl_mode = 0; - if (c_small_records) - { - ssl_mode = SSL_CTX_get_mode(c_ctx); - ssl_mode |= SSL_MODE_SMALL_BUFFERS; - SSL_CTX_set_mode(c_ctx, ssl_mode); - } - ssl_mode = 0; - if (s_small_records) - { - ssl_mode = SSL_CTX_get_mode(s_ctx); - ssl_mode |= SSL_MODE_SMALL_BUFFERS; - SSL_CTX_set_mode(s_ctx, ssl_mode); - } - ssl_mode = 0; if (cutthrough) { - ssl_mode = SSL_CTX_get_mode(c_ctx); - ssl_mode = SSL_MODE_HANDSHAKE_CUTTHROUGH; + int ssl_mode = SSL_CTX_get_mode(c_ctx); + ssl_mode |= SSL_MODE_HANDSHAKE_CUTTHROUGH; SSL_CTX_set_mode(c_ctx, ssl_mode); } @@ -878,7 +954,11 @@ bad: } } else +#ifdef OPENSSL_NO_EC2M + nid = NID_X9_62_prime256v1; +#else nid = NID_sect163r2; +#endif ecdh = EC_KEY_new_by_curve_name(nid); if (ecdh == NULL) @@ -981,6 +1061,26 @@ bad: } #endif } +#ifndef OPENSSL_NO_SRP + if (srp_client_arg.srplogin) + { + if (!SSL_CTX_set_srp_username(c_ctx, srp_client_arg.srplogin)) + { + BIO_printf(bio_err,"Unable to set SRP username\n"); + goto end; + } + SSL_CTX_set_srp_cb_arg(c_ctx,&srp_client_arg); + SSL_CTX_set_srp_client_pwd_callback(c_ctx, ssl_give_srp_client_pwd_cb); + /*SSL_CTX_set_srp_strength(c_ctx, srp_client_arg.strength);*/ + } + + if (srp_server_arg.expected_user != NULL) + { + SSL_CTX_set_verify(s_ctx,SSL_VERIFY_NONE,verify_callback); + SSL_CTX_set_srp_cb_arg(s_ctx, &srp_server_arg); + SSL_CTX_set_srp_username_callback(s_ctx, ssl_srp_server_param_cb); + } +#endif c_ssl=SSL_new(c_ctx); s_ssl=SSL_new(s_ctx); @@ -2205,15 +2305,7 @@ static int MS_CALLBACK app_verify_callback(X509_STORE_CTX *ctx, void *arg) } #ifndef OPENSSL_NO_X509_VERIFY -# ifdef OPENSSL_FIPS - if(s->version == TLS1_VERSION) - FIPS_allow_md5(1); -# endif ok = X509_verify_cert(ctx); -# ifdef OPENSSL_FIPS - if(s->version == TLS1_VERSION) - FIPS_allow_md5(0); -# endif #endif if (cb_arg->proxy_auth) diff --git a/app/openssl/ssl/t1_clnt.c b/app/openssl/ssl/t1_clnt.c index c87af177..578617ed 100644 --- a/app/openssl/ssl/t1_clnt.c +++ b/app/openssl/ssl/t1_clnt.c @@ -66,13 +66,26 @@ static const SSL_METHOD *tls1_get_client_method(int ver); static const SSL_METHOD *tls1_get_client_method(int ver) { + if (ver == TLS1_2_VERSION) + return TLSv1_2_client_method(); + if (ver == TLS1_1_VERSION) + return TLSv1_1_client_method(); if (ver == TLS1_VERSION) - return(TLSv1_client_method()); - else - return(NULL); + return TLSv1_client_method(); + return NULL; } -IMPLEMENT_tls1_meth_func(TLSv1_client_method, +IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_client_method, + ssl_undefined_function, + ssl3_connect, + tls1_get_client_method) + +IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_client_method, + ssl_undefined_function, + ssl3_connect, + tls1_get_client_method) + +IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_client_method, ssl_undefined_function, ssl3_connect, tls1_get_client_method) diff --git a/app/openssl/ssl/t1_enc.c b/app/openssl/ssl/t1_enc.c index 793ea43e..2ed2e076 100644 --- a/app/openssl/ssl/t1_enc.c +++ b/app/openssl/ssl/t1_enc.c @@ -143,6 +143,7 @@ #include #include #include +#include #ifdef KSSL_DEBUG #include #endif @@ -158,68 +159,75 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, unsigned char *out, int olen) { int chunk; - unsigned int j; - HMAC_CTX ctx; - HMAC_CTX ctx_tmp; + size_t j; + EVP_MD_CTX ctx, ctx_tmp; + EVP_PKEY *mac_key; unsigned char A1[EVP_MAX_MD_SIZE]; - unsigned int A1_len; + size_t A1_len; int ret = 0; chunk=EVP_MD_size(md); OPENSSL_assert(chunk >= 0); - HMAC_CTX_init(&ctx); - HMAC_CTX_init(&ctx_tmp); - if (!HMAC_Init_ex(&ctx,sec,sec_len,md, NULL)) + EVP_MD_CTX_init(&ctx); + EVP_MD_CTX_init(&ctx_tmp); + EVP_MD_CTX_set_flags(&ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + EVP_MD_CTX_set_flags(&ctx_tmp, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + mac_key = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, NULL, sec, sec_len); + if (!mac_key) goto err; - if (!HMAC_Init_ex(&ctx_tmp,sec,sec_len,md, NULL)) + if (!EVP_DigestSignInit(&ctx,NULL,md, NULL, mac_key)) goto err; - if (seed1 != NULL && !HMAC_Update(&ctx,seed1,seed1_len)) + if (!EVP_DigestSignInit(&ctx_tmp,NULL,md, NULL, mac_key)) goto err; - if (seed2 != NULL && !HMAC_Update(&ctx,seed2,seed2_len)) + if (seed1 && !EVP_DigestSignUpdate(&ctx,seed1,seed1_len)) goto err; - if (seed3 != NULL && !HMAC_Update(&ctx,seed3,seed3_len)) + if (seed2 && !EVP_DigestSignUpdate(&ctx,seed2,seed2_len)) goto err; - if (seed4 != NULL && !HMAC_Update(&ctx,seed4,seed4_len)) + if (seed3 && !EVP_DigestSignUpdate(&ctx,seed3,seed3_len)) goto err; - if (seed5 != NULL && !HMAC_Update(&ctx,seed5,seed5_len)) + if (seed4 && !EVP_DigestSignUpdate(&ctx,seed4,seed4_len)) goto err; - if (!HMAC_Final(&ctx,A1,&A1_len)) + if (seed5 && !EVP_DigestSignUpdate(&ctx,seed5,seed5_len)) + goto err; + if (!EVP_DigestSignFinal(&ctx,A1,&A1_len)) goto err; for (;;) { - if (!HMAC_Init_ex(&ctx,NULL,0,NULL,NULL)) /* re-init */ + /* Reinit mac contexts */ + if (!EVP_DigestSignInit(&ctx,NULL,md, NULL, mac_key)) goto err; - if (!HMAC_Init_ex(&ctx_tmp,NULL,0,NULL,NULL)) /* re-init */ + if (!EVP_DigestSignInit(&ctx_tmp,NULL,md, NULL, mac_key)) goto err; - if (!HMAC_Update(&ctx,A1,A1_len)) + if (!EVP_DigestSignUpdate(&ctx,A1,A1_len)) goto err; - if (!HMAC_Update(&ctx_tmp,A1,A1_len)) + if (!EVP_DigestSignUpdate(&ctx_tmp,A1,A1_len)) goto err; - if (seed1 != NULL && !HMAC_Update(&ctx,seed1,seed1_len)) + if (seed1 && !EVP_DigestSignUpdate(&ctx,seed1,seed1_len)) goto err; - if (seed2 != NULL && !HMAC_Update(&ctx,seed2,seed2_len)) + if (seed2 && !EVP_DigestSignUpdate(&ctx,seed2,seed2_len)) goto err; - if (seed3 != NULL && !HMAC_Update(&ctx,seed3,seed3_len)) + if (seed3 && !EVP_DigestSignUpdate(&ctx,seed3,seed3_len)) goto err; - if (seed4 != NULL && !HMAC_Update(&ctx,seed4,seed4_len)) + if (seed4 && !EVP_DigestSignUpdate(&ctx,seed4,seed4_len)) goto err; - if (seed5 != NULL && !HMAC_Update(&ctx,seed5,seed5_len)) + if (seed5 && !EVP_DigestSignUpdate(&ctx,seed5,seed5_len)) goto err; if (olen > chunk) { - if (!HMAC_Final(&ctx,out,&j)) + if (!EVP_DigestSignFinal(&ctx,out,&j)) goto err; out+=j; olen-=j; - if (!HMAC_Final(&ctx_tmp,A1,&A1_len)) /* calc the next A1 value */ + /* calc the next A1 value */ + if (!EVP_DigestSignFinal(&ctx_tmp,A1,&A1_len)) goto err; } else /* last one */ { - if (!HMAC_Final(&ctx,A1,&A1_len)) + if (!EVP_DigestSignFinal(&ctx,A1,&A1_len)) goto err; memcpy(out,A1,olen); break; @@ -227,8 +235,9 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, } ret = 1; err: - HMAC_CTX_cleanup(&ctx); - HMAC_CTX_cleanup(&ctx_tmp); + EVP_PKEY_free(mac_key); + EVP_MD_CTX_cleanup(&ctx); + EVP_MD_CTX_cleanup(&ctx_tmp); OPENSSL_cleanse(A1,sizeof(A1)); return ret; } @@ -256,6 +265,8 @@ static int tls1_PRF(long digest_mask, if ((m<s3->tmp.new_cipher->algorithm2, + ret = tls1_PRF(ssl_get_algorithm2(s), TLS_MD_KEY_EXPANSION_CONST,TLS_MD_KEY_EXPANSION_CONST_SIZE, s->s3->server_random,SSL3_RANDOM_SIZE, s->s3->client_random,SSL3_RANDOM_SIZE, @@ -350,7 +361,7 @@ int tls1_change_cipher_state(SSL *s, int which) { int i; for (i=0; is3->tmp.key_block_length; i++) - printf("%02x", key_block[i]); printf("\n"); + printf("%02x", s->s3->tmp.key_block[i]); printf("\n"); } #endif /* KSSL_DEBUG */ @@ -358,7 +369,7 @@ int tls1_change_cipher_state(SSL *s, int which) { if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) s->mac_flags |= SSL_MAC_FLAG_READ_MAC_STREAM; - else + else s->mac_flags &= ~SSL_MAC_FLAG_READ_MAC_STREAM; if (s->enc_read_ctx != NULL) @@ -403,15 +414,20 @@ int tls1_change_cipher_state(SSL *s, int which) s->mac_flags |= SSL_MAC_FLAG_WRITE_MAC_STREAM; else s->mac_flags &= ~SSL_MAC_FLAG_WRITE_MAC_STREAM; - if (s->enc_write_ctx != NULL) + if (s->enc_write_ctx != NULL && !SSL_IS_DTLS(s)) reuse_dd = 1; - else if ((s->enc_write_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))) == NULL) + else if ((s->enc_write_ctx=EVP_CIPHER_CTX_new()) == NULL) goto err; - else - /* make sure it's intialized in case we exit later with an error */ - EVP_CIPHER_CTX_init(s->enc_write_ctx); dd= s->enc_write_ctx; - mac_ctx = ssl_replace_hash(&s->write_hash,NULL); + if (SSL_IS_DTLS(s)) + { + mac_ctx = EVP_MD_CTX_create(); + if (!mac_ctx) + goto err; + s->write_hash = mac_ctx; + } + else + mac_ctx = ssl_replace_hash(&s->write_hash,NULL); #ifndef OPENSSL_NO_COMP if (s->compress != NULL) { @@ -445,7 +461,11 @@ int tls1_change_cipher_state(SSL *s, int which) j=is_export ? (cl < SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher) ? cl : SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) : cl; /* Was j=(exp)?5:EVP_CIPHER_key_length(c); */ - k=EVP_CIPHER_iv_length(c); + /* If GCM mode only part of IV comes from PRF */ + if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) + k = EVP_GCM_TLS_FIXED_IV_LEN; + else + k=EVP_CIPHER_iv_length(c); if ( (which == SSL3_CHANGE_CIPHER_CLIENT_WRITE) || (which == SSL3_CHANGE_CIPHER_SERVER_READ)) { @@ -474,10 +494,14 @@ int tls1_change_cipher_state(SSL *s, int which) } memcpy(mac_secret,ms,i); - mac_key = EVP_PKEY_new_mac_key(mac_type, NULL, - mac_secret,*mac_secret_size); - EVP_DigestSignInit(mac_ctx,NULL,m,NULL,mac_key); - EVP_PKEY_free(mac_key); + + if (!(EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER)) + { + mac_key = EVP_PKEY_new_mac_key(mac_type, NULL, + mac_secret,*mac_secret_size); + EVP_DigestSignInit(mac_ctx,NULL,m,NULL,mac_key); + EVP_PKEY_free(mac_key); + } #ifdef TLS_DEBUG printf("which = %04X\nmac key=",which); { int z; for (z=0; zoptions & SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS)) + if (s->method->version <= TLS1_VERSION && + (s->mode & SSL_MODE_CBC_RECORD_SPLITTING) != 0) { /* enable vulnerability countermeasure for CBC ciphers with * known-IV problem (http://www.openssl.org/~bodo/tls-cbc.txt) */ - s->s3->need_empty_fragments = 1; + s->s3->need_record_splitting = 1; if (s->session->cipher != NULL) { if (s->session->cipher->algorithm_enc == SSL_eNULL) - s->s3->need_empty_fragments = 0; + s->s3->need_record_splitting = 0; #ifndef OPENSSL_NO_RC4 if (s->session->cipher->algorithm_enc == SSL_RC4) - s->s3->need_empty_fragments = 0; + s->s3->need_record_splitting = 0; #endif } } @@ -635,19 +672,28 @@ err: return(ret); } +/* tls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively. + * + * Returns: + * 0: (in non-constant time) if the record is publically invalid (i.e. too + * short etc). + * 1: if the record's padding is valid / the encryption was successful. + * -1: if the record's padding/AEAD-authenticator is invalid or, if sending, + * an internal error occured. + */ int tls1_enc(SSL *s, int send) { SSL3_RECORD *rec; EVP_CIPHER_CTX *ds; unsigned long l; - int bs,i,ii,j,k,n=0; + int bs,i,j,k,pad=0,ret,mac_size=0; const EVP_CIPHER *enc; if (send) { if (EVP_MD_CTX_md(s->write_hash)) { - n=EVP_MD_CTX_size(s->write_hash); + int n=EVP_MD_CTX_size(s->write_hash); OPENSSL_assert(n >= 0); } ds=s->enc_write_ctx; @@ -655,13 +701,34 @@ int tls1_enc(SSL *s, int send) if (s->enc_write_ctx == NULL) enc=NULL; else + { + int ivlen; enc=EVP_CIPHER_CTX_cipher(s->enc_write_ctx); + /* For TLSv1.1 and later explicit IV */ + if (s->version >= TLS1_1_VERSION + && EVP_CIPHER_mode(enc) == EVP_CIPH_CBC_MODE) + ivlen = EVP_CIPHER_iv_length(enc); + else + ivlen = 0; + if (ivlen > 1) + { + if ( rec->data != rec->input) + /* we can't write into the input stream: + * Can this ever happen?? (steve) + */ + fprintf(stderr, + "%s:%d: rec->data != rec->input\n", + __FILE__, __LINE__); + else if (RAND_bytes(rec->input, ivlen) <= 0) + return -1; + } + } } else { if (EVP_MD_CTX_md(s->read_hash)) { - n=EVP_MD_CTX_size(s->read_hash); + int n=EVP_MD_CTX_size(s->read_hash); OPENSSL_assert(n >= 0); } ds=s->enc_read_ctx; @@ -676,18 +743,54 @@ int tls1_enc(SSL *s, int send) printf("tls1_enc(%d)\n", send); #endif /* KSSL_DEBUG */ - if ((s->session == NULL) || (ds == NULL) || - (enc == NULL)) + if ((s->session == NULL) || (ds == NULL) || (enc == NULL)) { memmove(rec->data,rec->input,rec->length); rec->input=rec->data; + ret = 1; } else { l=rec->length; bs=EVP_CIPHER_block_size(ds->cipher); - if ((bs != 1) && send) + if (EVP_CIPHER_flags(ds->cipher)&EVP_CIPH_FLAG_AEAD_CIPHER) + { + unsigned char buf[13],*seq; + + seq = send?s->s3->write_sequence:s->s3->read_sequence; + + if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) + { + unsigned char dtlsseq[9],*p=dtlsseq; + + s2n(send?s->d1->w_epoch:s->d1->r_epoch,p); + memcpy(p,&seq[2],6); + memcpy(buf,dtlsseq,8); + } + else + { + memcpy(buf,seq,8); + for (i=7; i>=0; i--) /* increment */ + { + ++seq[i]; + if (seq[i] != 0) break; + } + } + + buf[8]=rec->type; + buf[9]=(unsigned char)(s->version>>8); + buf[10]=(unsigned char)(s->version); + buf[11]=rec->length>>8; + buf[12]=rec->length&0xff; + pad=EVP_CIPHER_CTX_ctrl(ds,EVP_CTRL_AEAD_TLS1_AAD,13,buf); + if (send) + { + l+=pad; + rec->length+=pad; + } + } + else if ((bs != 1) && send) { i=bs-((int)l%bs); @@ -708,13 +811,13 @@ int tls1_enc(SSL *s, int send) #ifdef KSSL_DEBUG { - unsigned long ui; + unsigned long ui; printf("EVP_Cipher(ds=%p,rec->data=%p,rec->input=%p,l=%ld) ==>\n", - ds,rec->data,rec->input,l); + ds,rec->data,rec->input,l); printf("\tEVP_CIPHER_CTX: %d buf_len, %d key_len [%d %d], %d iv_len\n", - ds->buf_len, ds->cipher->key_len, - DES_KEY_SZ, DES_SCHEDULE_SZ, - ds->cipher->iv_len); + ds->buf_len, ds->cipher->key_len, + DES_KEY_SZ, DES_SCHEDULE_SZ, + ds->cipher->iv_len); printf("\t\tIV: "); for (i=0; icipher->iv_len; i++) printf("%02X", ds->iv[i]); printf("\n"); @@ -727,68 +830,41 @@ int tls1_enc(SSL *s, int send) if (!send) { if (l == 0 || l%bs != 0) - { - SSLerr(SSL_F_TLS1_ENC,SSL_R_BLOCK_CIPHER_PAD_IS_WRONG); - ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_DECRYPTION_FAILED); return 0; - } } - EVP_Cipher(ds,rec->data,rec->input,l); + i = EVP_Cipher(ds,rec->data,rec->input,l); + if ((EVP_CIPHER_flags(ds->cipher)&EVP_CIPH_FLAG_CUSTOM_CIPHER) + ?(i<0) + :(i==0)) + return -1; /* AEAD can fail to verify MAC */ + if (EVP_CIPHER_mode(enc) == EVP_CIPH_GCM_MODE && !send) + { + rec->data += EVP_GCM_TLS_EXPLICIT_IV_LEN; + rec->input += EVP_GCM_TLS_EXPLICIT_IV_LEN; + rec->length -= EVP_GCM_TLS_EXPLICIT_IV_LEN; + } #ifdef KSSL_DEBUG { - unsigned long i; - printf("\trec->data="); + unsigned long i; + printf("\trec->data="); for (i=0; idata[i]); printf("\n"); - } + printf(" %02x", rec->data[i]); printf("\n"); + } #endif /* KSSL_DEBUG */ + ret = 1; + if (EVP_MD_CTX_md(s->read_hash) != NULL) + mac_size = EVP_MD_CTX_size(s->read_hash); if ((bs != 1) && !send) - { - ii=i=rec->data[l-1]; /* padding_length */ - i++; - /* NB: if compression is in operation the first packet - * may not be of even length so the padding bug check - * cannot be performed. This bug workaround has been - * around since SSLeay so hopefully it is either fixed - * now or no buggy implementation supports compression - * [steve] - */ - if ( (s->options&SSL_OP_TLS_BLOCK_PADDING_BUG) - && !s->expand) - { - /* First packet is even in size, so check */ - if ((memcmp(s->s3->read_sequence, - "\0\0\0\0\0\0\0\0",8) == 0) && !(ii & 1)) - s->s3->flags|=TLS1_FLAGS_TLS_PADDING_BUG; - if (s->s3->flags & TLS1_FLAGS_TLS_PADDING_BUG) - i--; - } - /* TLS 1.0 does not bound the number of padding bytes by the block size. - * All of them must have value 'padding_length'. */ - if (i > (int)rec->length) - { - /* Incorrect padding. SSLerr() and ssl3_alert are done - * by caller: we don't want to reveal whether this is - * a decryption error or a MAC verification failure - * (see http://www.openssl.org/~bodo/tls-cbc.txt) */ - return -1; - } - for (j=(int)(l-i); j<(int)l; j++) - { - if (rec->data[j] != ii) - { - /* Incorrect padding */ - return -1; - } - } - rec->length-=i; - } + ret = tls1_cbc_remove_padding(s, rec, bs, mac_size); + if (pad && !send) + rec->length -= pad; } - return(1); + return ret; } + int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *out) { unsigned int ret; @@ -841,26 +917,27 @@ int tls1_final_finish_mac(SSL *s, for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++) { - if (mask & s->s3->tmp.new_cipher->algorithm2) + if (mask & ssl_get_algorithm2(s)) { int hashsize = EVP_MD_size(md); - if (hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) + EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx]; + if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf))) { /* internal error: 'buf' is too small for this cipersuite! */ err = 1; } else { - EVP_MD_CTX_copy_ex(&ctx,s->s3->handshake_dgst[idx]); - EVP_DigestFinal_ex(&ctx,q,&i); - if (i != (unsigned int)hashsize) /* can't really happen */ + if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) || + !EVP_DigestFinal_ex(&ctx,q,&i) || + (i != (unsigned int)hashsize)) err = 1; - q+=i; + q+=hashsize; } } } - if (!tls1_PRF(s->s3->tmp.new_cipher->algorithm2, + if (!tls1_PRF(ssl_get_algorithm2(s), str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0, s->session->master_key,s->session->master_key_length, out,buf2,sizeof buf2)) @@ -878,10 +955,10 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) SSL3_RECORD *rec; unsigned char *seq; EVP_MD_CTX *hash; - size_t md_size; + size_t md_size, orig_len; int i; EVP_MD_CTX hmac, *mac_ctx; - unsigned char buf[5]; + unsigned char header[13]; int stream_mac = (send?(ssl->mac_flags & SSL_MAC_FLAG_WRITE_MAC_STREAM):(ssl->mac_flags&SSL_MAC_FLAG_READ_MAC_STREAM)); int t; @@ -902,12 +979,6 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) OPENSSL_assert(t >= 0); md_size=t; - buf[0]=rec->type; - buf[1]=(unsigned char)(ssl->version>>8); - buf[2]=(unsigned char)(ssl->version); - buf[3]=rec->length>>8; - buf[4]=rec->length&0xff; - /* I should fix this up TLS TLS TLS TLS TLS XXXXXXXX */ if (stream_mac) { @@ -915,7 +986,8 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) } else { - EVP_MD_CTX_copy(&hmac,hash); + if (!EVP_MD_CTX_copy(&hmac,hash)) + return -1; mac_ctx = &hmac; } @@ -926,17 +998,55 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) s2n(send?ssl->d1->w_epoch:ssl->d1->r_epoch, p); memcpy (p,&seq[2],6); - EVP_DigestSignUpdate(mac_ctx,dtlsseq,8); + memcpy(header, dtlsseq, 8); } else - EVP_DigestSignUpdate(mac_ctx,seq,8); + memcpy(header, seq, 8); - EVP_DigestSignUpdate(mac_ctx,buf,5); - EVP_DigestSignUpdate(mac_ctx,rec->input,rec->length); - t=EVP_DigestSignFinal(mac_ctx,md,&md_size); - OPENSSL_assert(t > 0); + /* kludge: tls1_cbc_remove_padding passes padding length in rec->type */ + orig_len = rec->length+md_size+((unsigned int)rec->type>>8); + rec->type &= 0xff; + + header[8]=rec->type; + header[9]=(unsigned char)(ssl->version>>8); + header[10]=(unsigned char)(ssl->version); + header[11]=(rec->length)>>8; + header[12]=(rec->length)&0xff; + + if (!send && + EVP_CIPHER_CTX_mode(ssl->enc_read_ctx) == EVP_CIPH_CBC_MODE && + ssl3_cbc_record_digest_supported(mac_ctx)) + { + /* This is a CBC-encrypted record. We must avoid leaking any + * timing-side channel information about how many blocks of + * data we are hashing because that gives an attacker a + * timing-oracle. */ + ssl3_cbc_digest_record( + mac_ctx, + md, &md_size, + header, rec->input, + rec->length + md_size, orig_len, + ssl->s3->read_mac_secret, + ssl->s3->read_mac_secret_size, + 0 /* not SSLv3 */); + } + else + { + EVP_DigestSignUpdate(mac_ctx,header,sizeof(header)); + EVP_DigestSignUpdate(mac_ctx,rec->input,rec->length); + t=EVP_DigestSignFinal(mac_ctx,md,&md_size); + OPENSSL_assert(t > 0); +#ifdef OPENSSL_FIPS + if (!send && FIPS_mode()) + tls_fips_digest_extra( + ssl->enc_read_ctx, + mac_ctx, rec->input, + rec->length, orig_len); +#endif + } - if (!stream_mac) EVP_MD_CTX_cleanup(&hmac); + if (!stream_mac) + EVP_MD_CTX_cleanup(&hmac); #ifdef TLS_DEBUG printf("sec="); {unsigned int z; for (z=0; zs3->tmp.new_cipher->algorithm2, + tls1_PRF(ssl_get_algorithm2(s), TLS_MD_MASTER_SECRET_CONST,TLS_MD_MASTER_SECRET_CONST_SIZE, s->s3->client_random,SSL3_RANDOM_SIZE, co, col, @@ -994,6 +1105,16 @@ int tls1_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, so, sol, p,len, s->session->master_key,buff,sizeof buff); +#ifdef SSL_DEBUG + fprintf(stderr, "Premaster Secret:\n"); + BIO_dump_fp(stderr, (char *)p, len); + fprintf(stderr, "Client Random:\n"); + BIO_dump_fp(stderr, (char *)s->s3->client_random, SSL3_RANDOM_SIZE); + fprintf(stderr, "Server Random:\n"); + BIO_dump_fp(stderr, (char *)s->s3->server_random, SSL3_RANDOM_SIZE); + fprintf(stderr, "Master Secret:\n"); + BIO_dump_fp(stderr, (char *)s->session->master_key, SSL3_MASTER_SECRET_SIZE); +#endif #ifdef KSSL_DEBUG printf ("tls1_generate_master_secret() complete\n"); @@ -1001,6 +1122,95 @@ int tls1_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, return(SSL3_MASTER_SECRET_SIZE); } +int tls1_export_keying_material(SSL *s, unsigned char *out, size_t olen, + const char *label, size_t llen, const unsigned char *context, + size_t contextlen, int use_context) + { + unsigned char *buff; + unsigned char *val = NULL; + size_t vallen, currentvalpos; + int rv; + +#ifdef KSSL_DEBUG + printf ("tls1_export_keying_material(%p,%p,%d,%s,%d,%p,%d)\n", s, out, olen, label, llen, p, plen); +#endif /* KSSL_DEBUG */ + + buff = OPENSSL_malloc(olen); + if (buff == NULL) goto err2; + + /* construct PRF arguments + * we construct the PRF argument ourself rather than passing separate + * values into the TLS PRF to ensure that the concatenation of values + * does not create a prohibited label. + */ + vallen = llen + SSL3_RANDOM_SIZE * 2; + if (use_context) + { + vallen += 2 + contextlen; + } + + val = OPENSSL_malloc(vallen); + if (val == NULL) goto err2; + currentvalpos = 0; + memcpy(val + currentvalpos, (unsigned char *) label, llen); + currentvalpos += llen; + memcpy(val + currentvalpos, s->s3->client_random, SSL3_RANDOM_SIZE); + currentvalpos += SSL3_RANDOM_SIZE; + memcpy(val + currentvalpos, s->s3->server_random, SSL3_RANDOM_SIZE); + currentvalpos += SSL3_RANDOM_SIZE; + + if (use_context) + { + val[currentvalpos] = (contextlen >> 8) & 0xff; + currentvalpos++; + val[currentvalpos] = contextlen & 0xff; + currentvalpos++; + if ((contextlen > 0) || (context != NULL)) + { + memcpy(val + currentvalpos, context, contextlen); + } + } + + /* disallow prohibited labels + * note that SSL3_RANDOM_SIZE > max(prohibited label len) = + * 15, so size of val > max(prohibited label len) = 15 and the + * comparisons won't have buffer overflow + */ + if (memcmp(val, TLS_MD_CLIENT_FINISH_CONST, + TLS_MD_CLIENT_FINISH_CONST_SIZE) == 0) goto err1; + if (memcmp(val, TLS_MD_SERVER_FINISH_CONST, + TLS_MD_SERVER_FINISH_CONST_SIZE) == 0) goto err1; + if (memcmp(val, TLS_MD_MASTER_SECRET_CONST, + TLS_MD_MASTER_SECRET_CONST_SIZE) == 0) goto err1; + if (memcmp(val, TLS_MD_KEY_EXPANSION_CONST, + TLS_MD_KEY_EXPANSION_CONST_SIZE) == 0) goto err1; + + rv = tls1_PRF(s->s3->tmp.new_cipher->algorithm2, + val, vallen, + NULL, 0, + NULL, 0, + NULL, 0, + NULL, 0, + s->session->master_key,s->session->master_key_length, + out,buff,olen); + +#ifdef KSSL_DEBUG + printf ("tls1_export_keying_material() complete\n"); +#endif /* KSSL_DEBUG */ + goto ret; +err1: + SSLerr(SSL_F_TLS1_EXPORT_KEYING_MATERIAL, SSL_R_TLS_ILLEGAL_EXPORTER_LABEL); + rv = 0; + goto ret; +err2: + SSLerr(SSL_F_TLS1_EXPORT_KEYING_MATERIAL, ERR_R_MALLOC_FAILURE); + rv = 0; +ret: + if (buff != NULL) OPENSSL_free(buff); + if (val != NULL) OPENSSL_free(val); + return(rv); + } + int tls1_alert_code(int code) { switch (code) @@ -1042,4 +1252,3 @@ int tls1_alert_code(int code) default: return(-1); } } - diff --git a/app/openssl/ssl/t1_lib.c b/app/openssl/ssl/t1_lib.c index bbab4675..369e09f4 100644 --- a/app/openssl/ssl/t1_lib.c +++ b/app/openssl/ssl/t1_lib.c @@ -114,6 +114,7 @@ #include #include #include +#include #include "ssl_locl.h" const char tls1_version_str[]="TLSv1" OPENSSL_VERSION_PTEXT; @@ -136,6 +137,7 @@ SSL3_ENC_METHOD TLSv1_enc_data={ TLS_MD_CLIENT_FINISH_CONST,TLS_MD_CLIENT_FINISH_CONST_SIZE, TLS_MD_SERVER_FINISH_CONST,TLS_MD_SERVER_FINISH_CONST_SIZE, tls1_alert_code, + tls1_export_keying_material, }; long tls1_default_timeout(void) @@ -166,10 +168,11 @@ void tls1_free(SSL *s) void tls1_clear(SSL *s) { ssl3_clear(s); - s->version=TLS1_VERSION; + s->version = s->method->version; } #ifndef OPENSSL_NO_EC + static int nid_list[] = { NID_sect163k1, /* sect163k1 (1) */ @@ -198,7 +201,36 @@ static int nid_list[] = NID_secp384r1, /* secp384r1 (24) */ NID_secp521r1 /* secp521r1 (25) */ }; - + +static int pref_list[] = + { + NID_sect571r1, /* sect571r1 (14) */ + NID_sect571k1, /* sect571k1 (13) */ + NID_secp521r1, /* secp521r1 (25) */ + NID_sect409k1, /* sect409k1 (11) */ + NID_sect409r1, /* sect409r1 (12) */ + NID_secp384r1, /* secp384r1 (24) */ + NID_sect283k1, /* sect283k1 (9) */ + NID_sect283r1, /* sect283r1 (10) */ + NID_secp256k1, /* secp256k1 (22) */ + NID_X9_62_prime256v1, /* secp256r1 (23) */ + NID_sect239k1, /* sect239k1 (8) */ + NID_sect233k1, /* sect233k1 (6) */ + NID_sect233r1, /* sect233r1 (7) */ + NID_secp224k1, /* secp224k1 (20) */ + NID_secp224r1, /* secp224r1 (21) */ + NID_sect193r1, /* sect193r1 (4) */ + NID_sect193r2, /* sect193r2 (5) */ + NID_secp192k1, /* secp192k1 (18) */ + NID_X9_62_prime192v1, /* secp192r1 (19) */ + NID_sect163k1, /* sect163k1 (1) */ + NID_sect163r1, /* sect163r1 (2) */ + NID_sect163r2, /* sect163r2 (3) */ + NID_secp160k1, /* secp160k1 (15) */ + NID_secp160r1, /* secp160r1 (16) */ + NID_secp160r2, /* secp160r2 (17) */ + }; + int tls1_ec_curve_id2nid(int curve_id) { /* ECC curves from draft-ietf-tls-ecc-12.txt (Oct. 17, 2005) */ @@ -270,6 +302,56 @@ int tls1_ec_nid2curve_id(int nid) #endif /* OPENSSL_NO_EC */ #ifndef OPENSSL_NO_TLSEXT + +/* List of supported signature algorithms and hashes. Should make this + * customisable at some point, for now include everything we support. + */ + +#ifdef OPENSSL_NO_RSA +#define tlsext_sigalg_rsa(md) /* */ +#else +#define tlsext_sigalg_rsa(md) md, TLSEXT_signature_rsa, +#endif + +#ifdef OPENSSL_NO_DSA +#define tlsext_sigalg_dsa(md) /* */ +#else +#define tlsext_sigalg_dsa(md) md, TLSEXT_signature_dsa, +#endif + +#ifdef OPENSSL_NO_ECDSA +#define tlsext_sigalg_ecdsa(md) /* */ +#else +#define tlsext_sigalg_ecdsa(md) md, TLSEXT_signature_ecdsa, +#endif + +#define tlsext_sigalg(md) \ + tlsext_sigalg_rsa(md) \ + tlsext_sigalg_dsa(md) \ + tlsext_sigalg_ecdsa(md) + +static unsigned char tls12_sigalgs[] = { +#ifndef OPENSSL_NO_SHA512 + tlsext_sigalg(TLSEXT_hash_sha512) + tlsext_sigalg(TLSEXT_hash_sha384) +#endif +#ifndef OPENSSL_NO_SHA256 + tlsext_sigalg(TLSEXT_hash_sha256) + tlsext_sigalg(TLSEXT_hash_sha224) +#endif +#ifndef OPENSSL_NO_SHA + tlsext_sigalg(TLSEXT_hash_sha1) +#endif +}; + +int tls12_get_req_sig_algs(SSL *s, unsigned char *p) + { + size_t slen = sizeof(tls12_sigalgs); + if (p) + memcpy(p, tls12_sigalgs, slen); + return (int)slen; + } + unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned char *limit) { int extdatalen=0; @@ -317,7 +399,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha } /* Add RI if renegotiating */ - if (s->new_session) + if (s->renegotiate) { int el; @@ -341,6 +423,34 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha ret += el; } +#ifndef OPENSSL_NO_SRP + /* Add SRP username if there is one */ + if (s->srp_ctx.login != NULL) + { /* Add TLS extension SRP username to the Client Hello message */ + + int login_len = strlen(s->srp_ctx.login); + if (login_len > 255 || login_len == 0) + { + SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); + return NULL; + } + + /* check for enough space. + 4 for the srp type type and entension length + 1 for the srp user identity + + srp user identity length + */ + if ((limit - ret - 5 - login_len) < 0) return NULL; + + /* fill in the extension */ + s2n(TLSEXT_TYPE_srp,ret); + s2n(login_len+1,ret); + (*ret++) = (unsigned char) login_len; + memcpy(ret, s->srp_ctx.login, login_len); + ret+=login_len; + } +#endif + #ifndef OPENSSL_NO_EC if (s->tlsext_ecpointformatlist != NULL && s->version != DTLS1_VERSION) @@ -426,6 +536,17 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha } skip_ext: + if (TLS1_get_client_version(s) >= TLS1_2_VERSION) + { + if ((size_t)(limit - ret) < sizeof(tls12_sigalgs) + 6) + return NULL; + s2n(TLSEXT_TYPE_signature_algorithms,ret); + s2n(sizeof(tls12_sigalgs) + 2, ret); + s2n(sizeof(tls12_sigalgs), ret); + memcpy(ret, tls12_sigalgs, sizeof(tls12_sigalgs)); + ret += sizeof(tls12_sigalgs); + } + #ifdef TLSEXT_TYPE_opaque_prf_input if (s->s3->client_opaque_prf_input != NULL && s->version != DTLS1_VERSION) @@ -494,6 +615,20 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha i2d_X509_EXTENSIONS(s->tlsext_ocsp_exts, &ret); } +#ifndef OPENSSL_NO_HEARTBEATS + /* Add Heartbeat extension */ + s2n(TLSEXT_TYPE_heartbeat,ret); + s2n(1,ret); + /* Set mode: + * 1: peer may send requests + * 2: peer not allowed to send requests + */ + if (s->tlsext_heartbeat & SSL_TLSEXT_HB_DONT_RECV_REQUESTS) + *(ret++) = SSL_TLSEXT_HB_DONT_SEND_REQUESTS; + else + *(ret++) = SSL_TLSEXT_HB_ENABLED; +#endif + #ifndef OPENSSL_NO_NEXTPROTONEG if (s->ctx->next_proto_select_cb && !s->s3->tmp.finish_md_len) { @@ -506,6 +641,79 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha } #endif + if (s->tlsext_channel_id_enabled) + { + /* The client advertises an emtpy extension to indicate its + * support for Channel ID. */ + if (limit - ret - 4 < 0) + return NULL; + s2n(TLSEXT_TYPE_channel_id,ret); + s2n(0,ret); + } + + if (s->alpn_client_proto_list && !s->s3->tmp.finish_md_len) + { + if ((size_t)(limit - ret) < 6 + s->alpn_client_proto_list_len) + return NULL; + s2n(TLSEXT_TYPE_application_layer_protocol_negotiation,ret); + s2n(2 + s->alpn_client_proto_list_len,ret); + s2n(s->alpn_client_proto_list_len,ret); + memcpy(ret, s->alpn_client_proto_list, + s->alpn_client_proto_list_len); + ret += s->alpn_client_proto_list_len; + } + +#ifndef OPENSSL_NO_SRTP + if(SSL_get_srtp_profiles(s)) + { + int el; + + ssl_add_clienthello_use_srtp_ext(s, 0, &el, 0); + + if((limit - p - 4 - el) < 0) return NULL; + + s2n(TLSEXT_TYPE_use_srtp,ret); + s2n(el,ret); + + if(ssl_add_clienthello_use_srtp_ext(s, ret, &el, el)) + { + SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); + return NULL; + } + ret += el; + } +#endif + +#ifdef TLSEXT_TYPE_padding + /* Add padding to workaround bugs in F5 terminators. + * See https://tools.ietf.org/html/draft-agl-tls-padding-03 + * + * NB: because this code works out the length of all existing + * extensions it MUST always appear last. + */ + { + int hlen = ret - (unsigned char *)s->init_buf->data; + /* The code in s23_clnt.c to build ClientHello messages includes the + * 5-byte record header in the buffer, while the code in s3_clnt.c does + * not. */ + if (s->state == SSL23_ST_CW_CLNT_HELLO_A) + hlen -= 5; + if (hlen > 0xff && hlen < 0x200) + { + hlen = 0x200 - hlen; + if (hlen >= 4) + hlen -= 4; + else + hlen = 0; + + s2n(TLSEXT_TYPE_padding, ret); + s2n(hlen, ret); + memset(ret, 0, hlen); + ret += hlen; + } + } +#endif + if ((extdatalen = ret-p-2)== 0) return p; @@ -618,6 +826,28 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha ret += sol; } #endif + +#ifndef OPENSSL_NO_SRTP + if(s->srtp_profile) + { + int el; + + ssl_add_serverhello_use_srtp_ext(s, 0, &el, 0); + + if((limit - p - 4 - el) < 0) return NULL; + + s2n(TLSEXT_TYPE_use_srtp,ret); + s2n(el,ret); + + if(ssl_add_serverhello_use_srtp_ext(s, ret, &el, el)) + { + SSLerr(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); + return NULL; + } + ret+=el; + } +#endif + if (((s->s3->tmp.new_cipher->id & 0xFFFF)==0x80 || (s->s3->tmp.new_cipher->id & 0xFFFF)==0x81) && (SSL_get_options(s) & SSL_OP_CRYPTOPRO_TLSEXT_BUG)) { const unsigned char cryptopro_ext[36] = { @@ -633,6 +863,24 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha } +#ifndef OPENSSL_NO_HEARTBEATS + /* Add Heartbeat extension if we've received one */ + if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) + { + s2n(TLSEXT_TYPE_heartbeat,ret); + s2n(1,ret); + /* Set mode: + * 1: peer may send requests + * 2: peer not allowed to send requests + */ + if (s->tlsext_heartbeat & SSL_TLSEXT_HB_DONT_RECV_REQUESTS) + *(ret++) = SSL_TLSEXT_HB_DONT_SEND_REQUESTS; + else + *(ret++) = SSL_TLSEXT_HB_ENABLED; + + } +#endif + #ifndef OPENSSL_NO_NEXTPROTONEG next_proto_neg_seen = s->s3->next_proto_neg_seen; s->s3->next_proto_neg_seen = 0; @@ -655,6 +903,31 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha } #endif + /* If the client advertised support for Channel ID, and we have it + * enabled, then we want to echo it back. */ + if (s->s3->tlsext_channel_id_valid) + { + if (limit - ret - 4 < 0) + return NULL; + s2n(TLSEXT_TYPE_channel_id,ret); + s2n(0,ret); + } + + if (s->s3->alpn_selected) + { + const unsigned char *selected = s->s3->alpn_selected; + unsigned len = s->s3->alpn_selected_len; + + if ((long)(limit - ret - 4 - 2 - 1 - len) < 0) + return NULL; + s2n(TLSEXT_TYPE_application_layer_protocol_negotiation,ret); + s2n(3 + len,ret); + s2n(1 + len,ret); + *ret++ = len; + memcpy(ret, selected, len); + ret += len; + } + if ((extdatalen = ret-p-2)== 0) return p; @@ -662,6 +935,159 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha return ret; } +#ifndef OPENSSL_NO_EC +/* ssl_check_for_safari attempts to fingerprint Safari using OS X + * SecureTransport using the TLS extension block in |d|, of length |n|. + * Safari, since 10.6, sends exactly these extensions, in this order: + * SNI, + * elliptic_curves + * ec_point_formats + * + * We wish to fingerprint Safari because they broke ECDHE-ECDSA support in 10.8, + * but they advertise support. So enabling ECDHE-ECDSA ciphers breaks them. + * Sadly we cannot differentiate 10.6, 10.7 and 10.8.4 (which work), from + * 10.8..10.8.3 (which don't work). + */ +static void ssl_check_for_safari(SSL *s, const unsigned char *data, const unsigned char *d, int n) { + unsigned short type, size; + static const unsigned char kSafariExtensionsBlock[] = { + 0x00, 0x0a, /* elliptic_curves extension */ + 0x00, 0x08, /* 8 bytes */ + 0x00, 0x06, /* 6 bytes of curve ids */ + 0x00, 0x17, /* P-256 */ + 0x00, 0x18, /* P-384 */ + 0x00, 0x19, /* P-521 */ + + 0x00, 0x0b, /* ec_point_formats */ + 0x00, 0x02, /* 2 bytes */ + 0x01, /* 1 point format */ + 0x00, /* uncompressed */ + }; + + /* The following is only present in TLS 1.2 */ + static const unsigned char kSafariTLS12ExtensionsBlock[] = { + 0x00, 0x0d, /* signature_algorithms */ + 0x00, 0x0c, /* 12 bytes */ + 0x00, 0x0a, /* 10 bytes */ + 0x05, 0x01, /* SHA-384/RSA */ + 0x04, 0x01, /* SHA-256/RSA */ + 0x02, 0x01, /* SHA-1/RSA */ + 0x04, 0x03, /* SHA-256/ECDSA */ + 0x02, 0x03, /* SHA-1/ECDSA */ + }; + + if (data >= (d+n-2)) + return; + data += 2; + + if (data > (d+n-4)) + return; + n2s(data,type); + n2s(data,size); + + if (type != TLSEXT_TYPE_server_name) + return; + + if (data+size > d+n) + return; + data += size; + + if (TLS1_get_client_version(s) >= TLS1_2_VERSION) + { + const size_t len1 = sizeof(kSafariExtensionsBlock); + const size_t len2 = sizeof(kSafariTLS12ExtensionsBlock); + + if (data + len1 + len2 != d+n) + return; + if (memcmp(data, kSafariExtensionsBlock, len1) != 0) + return; + if (memcmp(data + len1, kSafariTLS12ExtensionsBlock, len2) != 0) + return; + } + else + { + const size_t len = sizeof(kSafariExtensionsBlock); + + if (data + len != d+n) + return; + if (memcmp(data, kSafariExtensionsBlock, len) != 0) + return; + } + + s->s3->is_probably_safari = 1; +} +#endif /* !OPENSSL_NO_EC */ + +/* tls1_alpn_handle_client_hello is called to process the ALPN extension in a + * ClientHello. + * data: the contents of the extension, not including the type and length. + * data_len: the number of bytes in |data| + * al: a pointer to the alert value to send in the event of a non-zero + * return. + * + * returns: 0 on success. */ +static int tls1_alpn_handle_client_hello(SSL *s, const unsigned char *data, + unsigned data_len, int *al) + { + unsigned i; + unsigned proto_len; + const unsigned char *selected; + unsigned char selected_len; + int r; + + if (s->ctx->alpn_select_cb == NULL) + return 0; + + if (data_len < 2) + goto parse_error; + + /* data should contain a uint16 length followed by a series of 8-bit, + * length-prefixed strings. */ + i = ((unsigned) data[0]) << 8 | + ((unsigned) data[1]); + data_len -= 2; + data += 2; + if (data_len != i) + goto parse_error; + + if (data_len < 2) + goto parse_error; + + for (i = 0; i < data_len;) + { + proto_len = data[i]; + i++; + + if (proto_len == 0) + goto parse_error; + + if (i + proto_len < i || i + proto_len > data_len) + goto parse_error; + + i += proto_len; + } + + r = s->ctx->alpn_select_cb(s, &selected, &selected_len, data, data_len, + s->ctx->alpn_select_cb_arg); + if (r == SSL_TLSEXT_ERR_OK) { + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = OPENSSL_malloc(selected_len); + if (!s->s3->alpn_selected) + { + *al = SSL_AD_INTERNAL_ERROR; + return -1; + } + memcpy(s->s3->alpn_selected, selected, selected_len); + s->s3->alpn_selected_len = selected_len; + } + return 0; + +parse_error: + *al = SSL_AD_DECODE_ERROR; + return -1; + } + int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, int n, int *al) { unsigned short type; @@ -669,10 +1095,37 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in unsigned short len; unsigned char *data = *p; int renegotiate_seen = 0; + int sigalg_seen = 0; s->servername_done = 0; s->tlsext_status_type = -1; + /* Reset TLS 1.2 digest functions to defaults because they don't carry + * over to a renegotiation. */ + s->s3->digest_rsa = NULL; + s->s3->digest_dsa = NULL; + s->s3->digest_ecdsa = NULL; + +#ifndef OPENSSL_NO_NEXTPROTONEG + s->s3->next_proto_neg_seen = 0; +#endif + + if (s->s3->alpn_selected) + { + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } + +#ifndef OPENSSL_NO_HEARTBEATS + s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED | + SSL_TLSEXT_HB_DONT_SEND_REQUESTS); +#endif + +#ifndef OPENSSL_NO_EC + if (s->options & SSL_OP_SAFARI_ECDHE_ECDSA_BUG) + ssl_check_for_safari(s, data, d, n); +#endif /* !OPENSSL_NO_EC */ + if (data >= (d+n-2)) goto ri_check; n2s(data,len); @@ -799,6 +1252,31 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in } } +#ifndef OPENSSL_NO_SRP + else if (type == TLSEXT_TYPE_srp) + { + if (size <= 0 || ((len = data[0])) != (size -1)) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + if (s->srp_ctx.login != NULL) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + if ((s->srp_ctx.login = OPENSSL_malloc(len+1)) == NULL) + return -1; + memcpy(s->srp_ctx.login, &data[1], len); + s->srp_ctx.login[len]='\0'; + + if (strlen(s->srp_ctx.login) != len) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + } +#endif #ifndef OPENSSL_NO_EC else if (type == TLSEXT_TYPE_ec_point_formats && @@ -843,7 +1321,8 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in int ellipticcurvelist_length = (*(sdata++) << 8); ellipticcurvelist_length += (*(sdata++)); - if (ellipticcurvelist_length != size - 2) + if (ellipticcurvelist_length != size - 2 || + ellipticcurvelist_length < 1) { *al = TLS1_AD_DECODE_ERROR; return 0; @@ -919,8 +1398,26 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in return 0; renegotiate_seen = 1; } + else if (type == TLSEXT_TYPE_signature_algorithms) + { + int dsize; + if (sigalg_seen || size < 2) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + sigalg_seen = 1; + n2s(data,dsize); + size -= 2; + if (dsize != size || dsize & 1) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + tls1_process_sigalgs(s, data, dsize); + } else if (type == TLSEXT_TYPE_status_request && - s->version != DTLS1_VERSION && s->ctx->tlsext_status_cb) + s->version != DTLS1_VERSION) { if (size < 5) @@ -1008,6 +1505,12 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in sdata = data; if (dsize > 0) { + if (s->tlsext_ocsp_exts) + { + sk_X509_EXTENSION_pop_free(s->tlsext_ocsp_exts, + X509_EXTENSION_free); + } + s->tlsext_ocsp_exts = d2i_X509_EXTENSIONS(NULL, &sdata, dsize); @@ -1025,9 +1528,27 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in else s->tlsext_status_type = -1; } +#ifndef OPENSSL_NO_HEARTBEATS + else if (type == TLSEXT_TYPE_heartbeat) + { + switch(data[0]) + { + case 0x01: /* Client allows us to send HB requests */ + s->tlsext_heartbeat |= SSL_TLSEXT_HB_ENABLED; + break; + case 0x02: /* Client doesn't accept HB requests */ + s->tlsext_heartbeat |= SSL_TLSEXT_HB_ENABLED; + s->tlsext_heartbeat |= SSL_TLSEXT_HB_DONT_SEND_REQUESTS; + break; + default: *al = SSL_AD_ILLEGAL_PARAMETER; + return 0; + } + } +#endif #ifndef OPENSSL_NO_NEXTPROTONEG else if (type == TLSEXT_TYPE_next_proto_neg && - s->s3->tmp.finish_md_len == 0) + s->s3->tmp.finish_md_len == 0 && + s->s3->alpn_selected == NULL) { /* We shouldn't accept this extension on a * renegotiation. @@ -1048,7 +1569,29 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in } #endif + else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled) + s->s3->tlsext_channel_id_valid = 1; + + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation && + s->ctx->alpn_select_cb && + s->s3->tmp.finish_md_len == 0) + { + if (tls1_alpn_handle_client_hello(s, data, size, al) != 0) + return 0; + /* ALPN takes precedence over NPN. */ + s->s3->next_proto_neg_seen = 0; + } + /* session ticket processed earlier */ +#ifndef OPENSSL_NO_SRTP + else if (type == TLSEXT_TYPE_use_srtp) + { + if(ssl_parse_clienthello_use_srtp_ext(s, data, size, + al)) + return 0; + } +#endif + data+=size; } @@ -1058,7 +1601,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in /* Need RI if renegotiating */ - if (!renegotiate_seen && s->new_session && + if (!renegotiate_seen && s->renegotiate && !(s->options & SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)) { *al = SSL_AD_HANDSHAKE_FAILURE; @@ -1074,7 +1617,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in /* ssl_next_proto_validate validates a Next Protocol Negotiation block. No * elements of zero length are allowed and the set of elements must exactly fill * the length of the block. */ -static int ssl_next_proto_validate(unsigned char *d, unsigned len) +static char ssl_next_proto_validate(unsigned char *d, unsigned len) { unsigned int off = 0; @@ -1099,6 +1642,21 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in int tlsext_servername = 0; int renegotiate_seen = 0; +#ifndef OPENSSL_NO_NEXTPROTONEG + s->s3->next_proto_neg_seen = 0; +#endif + + if (s->s3->alpn_selected) + { + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } + +#ifndef OPENSSL_NO_HEARTBEATS + s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED | + SSL_TLSEXT_HB_DONT_SEND_REQUESTS); +#endif + if (data >= (d+n-2)) goto ri_check; @@ -1138,7 +1696,8 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in unsigned char *sdata = data; int ecpointformatlist_length = *(sdata++); - if (ecpointformatlist_length != size - 1) + if (ecpointformatlist_length != size - 1 || + ecpointformatlist_length < 1) { *al = TLS1_AD_DECODE_ERROR; return 0; @@ -1225,13 +1784,14 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in s->tlsext_status_expected = 1; } #ifndef OPENSSL_NO_NEXTPROTONEG - else if (type == TLSEXT_TYPE_next_proto_neg) + else if (type == TLSEXT_TYPE_next_proto_neg && + s->s3->tmp.finish_md_len == 0) { unsigned char *selected; unsigned char selected_len; /* We must have requested it. */ - if ((s->ctx->next_proto_select_cb == NULL)) + if (s->ctx->next_proto_select_cb == NULL) { *al = TLS1_AD_UNSUPPORTED_EXTENSION; return 0; @@ -1255,31 +1815,106 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in } memcpy(s->next_proto_negotiated, selected, selected_len); s->next_proto_negotiated_len = selected_len; + s->s3->next_proto_neg_seen = 1; } #endif - else if (type == TLSEXT_TYPE_renegotiate) - { - if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al)) - return 0; - renegotiate_seen = 1; - } - data+=size; - } - - if (data != d+n) - { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + else if (type == TLSEXT_TYPE_channel_id) + s->s3->tlsext_channel_id_valid = 1; - if (!s->hit && tlsext_servername == 1) - { - if (s->tlsext_hostname) + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation) { - if (s->session->tlsext_hostname == NULL) + unsigned len; + + /* We must have requested it. */ + if (s->alpn_client_proto_list == NULL) { - s->session->tlsext_hostname = BUF_strdup(s->tlsext_hostname); - if (!s->session->tlsext_hostname) + *al = TLS1_AD_UNSUPPORTED_EXTENSION; + return 0; + } + if (size < 4) + { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + /* The extension data consists of: + * uint16 list_length + * uint8 proto_length; + * uint8 proto[proto_length]; */ + len = data[0]; + len <<= 8; + len |= data[1]; + if (len != (unsigned) size - 2) + { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + len = data[2]; + if (len != (unsigned) size - 3) + { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = OPENSSL_malloc(len); + if (!s->s3->alpn_selected) + { + *al = TLS1_AD_INTERNAL_ERROR; + return 0; + } + memcpy(s->s3->alpn_selected, data + 3, len); + s->s3->alpn_selected_len = len; + } + + else if (type == TLSEXT_TYPE_renegotiate) + { + if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al)) + return 0; + renegotiate_seen = 1; + } +#ifndef OPENSSL_NO_HEARTBEATS + else if (type == TLSEXT_TYPE_heartbeat) + { + switch(data[0]) + { + case 0x01: /* Server allows us to send HB requests */ + s->tlsext_heartbeat |= SSL_TLSEXT_HB_ENABLED; + break; + case 0x02: /* Server doesn't accept HB requests */ + s->tlsext_heartbeat |= SSL_TLSEXT_HB_ENABLED; + s->tlsext_heartbeat |= SSL_TLSEXT_HB_DONT_SEND_REQUESTS; + break; + default: *al = SSL_AD_ILLEGAL_PARAMETER; + return 0; + } + } +#endif +#ifndef OPENSSL_NO_SRTP + else if (type == TLSEXT_TYPE_use_srtp) + { + if(ssl_parse_serverhello_use_srtp_ext(s, data, size, + al)) + return 0; + } +#endif + + data+=size; + } + + if (data != d+n) + { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + + if (!s->hit && tlsext_servername == 1) + { + if (s->tlsext_hostname) + { + if (s->session->tlsext_hostname == NULL) + { + s->session->tlsext_hostname = BUF_strdup(s->tlsext_hostname); + if (!s->session->tlsext_hostname) { *al = SSL_AD_UNRECOGNIZED_NAME; return 0; @@ -1342,7 +1977,7 @@ int ssl_prepare_clienthello_tlsext(SSL *s) break; } } - using_ecc = using_ecc && (s->version == TLS1_VERSION); + using_ecc = using_ecc && (s->version >= TLS1_VERSION); if (using_ecc) { if (s->tlsext_ecpointformatlist != NULL) OPENSSL_free(s->tlsext_ecpointformatlist); @@ -1358,16 +1993,19 @@ int ssl_prepare_clienthello_tlsext(SSL *s) /* we support all named elliptic curves in draft-ietf-tls-ecc-12 */ if (s->tlsext_ellipticcurvelist != NULL) OPENSSL_free(s->tlsext_ellipticcurvelist); - s->tlsext_ellipticcurvelist_length = sizeof(nid_list)/sizeof(nid_list[0]) * 2; + s->tlsext_ellipticcurvelist_length = sizeof(pref_list)/sizeof(pref_list[0]) * 2; if ((s->tlsext_ellipticcurvelist = OPENSSL_malloc(s->tlsext_ellipticcurvelist_length)) == NULL) { s->tlsext_ellipticcurvelist_length = 0; SSLerr(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT,ERR_R_MALLOC_FAILURE); return -1; } - for (i = 1, j = s->tlsext_ellipticcurvelist; (unsigned int)i <= - sizeof(nid_list)/sizeof(nid_list[0]); i++) - s2n(i,j); + for (i = 0, j = s->tlsext_ellipticcurvelist; (unsigned int)i < + sizeof(pref_list)/sizeof(pref_list[0]); i++) + { + int id = tls1_ec_nid2curve_id(pref_list[i]); + s2n(id,j); + } } #endif /* OPENSSL_NO_EC */ @@ -1439,7 +2077,7 @@ int ssl_prepare_serverhello_tlsext(SSL *s) return 1; } -int ssl_check_clienthello_tlsext(SSL *s) +int ssl_check_clienthello_tlsext_early(SSL *s) { int ret=SSL_TLSEXT_ERR_NOACK; int al = SSL_AD_UNRECOGNIZED_NAME; @@ -1458,42 +2096,12 @@ int ssl_check_clienthello_tlsext(SSL *s) else if (s->initial_ctx != NULL && s->initial_ctx->tlsext_servername_callback != 0) ret = s->initial_ctx->tlsext_servername_callback(s, &al, s->initial_ctx->tlsext_servername_arg); - /* If status request then ask callback what to do. - * Note: this must be called after servername callbacks in case - * the certificate has changed. - */ - if ((s->tlsext_status_type != -1) && s->ctx && s->ctx->tlsext_status_cb) - { - int r; - r = s->ctx->tlsext_status_cb(s, s->ctx->tlsext_status_arg); - switch (r) - { - /* We don't want to send a status request response */ - case SSL_TLSEXT_ERR_NOACK: - s->tlsext_status_expected = 0; - break; - /* status request response should be sent */ - case SSL_TLSEXT_ERR_OK: - if (s->tlsext_ocsp_resp) - s->tlsext_status_expected = 1; - else - s->tlsext_status_expected = 0; - break; - /* something bad happened */ - case SSL_TLSEXT_ERR_ALERT_FATAL: - ret = SSL_TLSEXT_ERR_ALERT_FATAL; - al = SSL_AD_INTERNAL_ERROR; - goto err; - } - } - else - s->tlsext_status_expected = 0; - #ifdef TLSEXT_TYPE_opaque_prf_input { /* This sort of belongs into ssl_prepare_serverhello_tlsext(), * but we might be sending an alert in response to the client hello, - * so this has to happen here in ssl_check_clienthello_tlsext(). */ + * so this has to happen here in + * ssl_check_clienthello_tlsext_early(). */ int r = 1; @@ -1545,8 +2153,8 @@ int ssl_check_clienthello_tlsext(SSL *s) } } -#endif err: +#endif switch (ret) { case SSL_TLSEXT_ERR_ALERT_FATAL: @@ -1564,6 +2172,71 @@ int ssl_check_clienthello_tlsext(SSL *s) } } +int ssl_check_clienthello_tlsext_late(SSL *s) + { + int ret = SSL_TLSEXT_ERR_OK; + int al; + + /* If status request then ask callback what to do. + * Note: this must be called after servername callbacks in case + * the certificate has changed, and must be called after the cipher + * has been chosen because this may influence which certificate is sent + */ + if ((s->tlsext_status_type != -1) && s->ctx && s->ctx->tlsext_status_cb) + { + int r; + CERT_PKEY *certpkey; + certpkey = ssl_get_server_send_pkey(s); + /* If no certificate can't return certificate status */ + if (certpkey == NULL) + { + s->tlsext_status_expected = 0; + return 1; + } + /* Set current certificate to one we will use so + * SSL_get_certificate et al can pick it up. + */ + s->cert->key = certpkey; + r = s->ctx->tlsext_status_cb(s, s->ctx->tlsext_status_arg); + switch (r) + { + /* We don't want to send a status request response */ + case SSL_TLSEXT_ERR_NOACK: + s->tlsext_status_expected = 0; + break; + /* status request response should be sent */ + case SSL_TLSEXT_ERR_OK: + if (s->tlsext_ocsp_resp) + s->tlsext_status_expected = 1; + else + s->tlsext_status_expected = 0; + break; + /* something bad happened */ + case SSL_TLSEXT_ERR_ALERT_FATAL: + ret = SSL_TLSEXT_ERR_ALERT_FATAL; + al = SSL_AD_INTERNAL_ERROR; + goto err; + } + } + else + s->tlsext_status_expected = 0; + + err: + switch (ret) + { + case SSL_TLSEXT_ERR_ALERT_FATAL: + ssl3_send_alert(s,SSL3_AL_FATAL,al); + return -1; + + case SSL_TLSEXT_ERR_ALERT_WARNING: + ssl3_send_alert(s,SSL3_AL_WARNING,al); + return 1; + + default: + return 1; + } + } + int ssl_check_serverhello_tlsext(SSL *s) { int ret=SSL_TLSEXT_ERR_NOACK; @@ -1676,26 +2349,56 @@ int ssl_check_serverhello_tlsext(SSL *s) } } -/* Since the server cache lookup is done early on in the processing of client - * hello and other operations depend on the result we need to handle any TLS - * session ticket extension at the same time. +/* Since the server cache lookup is done early on in the processing of the + * ClientHello, and other operations depend on the result, we need to handle + * any TLS session ticket extension at the same time. + * + * session_id: points at the session ID in the ClientHello. This code will + * read past the end of this in order to parse out the session ticket + * extension, if any. + * len: the length of the session ID. + * limit: a pointer to the first byte after the ClientHello. + * ret: (output) on return, if a ticket was decrypted, then this is set to + * point to the resulting session. + * + * If s->tls_session_secret_cb is set then we are expecting a pre-shared key + * ciphersuite, in which case we have no use for session tickets and one will + * never be decrypted, nor will s->tlsext_ticket_expected be set to 1. + * + * Returns: + * -1: fatal error, either from parsing or decrypting the ticket. + * 0: no ticket was found (or was ignored, based on settings). + * 1: a zero length extension was found, indicating that the client supports + * session tickets but doesn't currently have one to offer. + * 2: either s->tls_session_secret_cb was set, or a ticket was offered but + * couldn't be decrypted because of a non-fatal error. + * 3: a ticket was successfully decrypted and *ret was set. + * + * Side effects: + * Sets s->tlsext_ticket_expected to 1 if the server will have to issue + * a new session ticket to the client because the client indicated support + * (and s->tls_session_secret_cb is NULL) but the client either doesn't have + * a session ticket or we couldn't use the one it gave us, or if + * s->ctx->tlsext_ticket_key_cb asked to renew the client's ticket. + * Otherwise, s->tlsext_ticket_expected is set to 0. */ - int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, - const unsigned char *limit, SSL_SESSION **ret) + const unsigned char *limit, SSL_SESSION **ret) { /* Point after session ID in client hello */ const unsigned char *p = session_id + len; unsigned short i; + *ret = NULL; + s->tlsext_ticket_expected = 0; + /* If tickets disabled behave as if no ticket present - * to permit stateful resumption. - */ + * to permit stateful resumption. + */ if (SSL_get_options(s) & SSL_OP_NO_TICKET) - return 1; - + return 0; if ((s->version <= SSL3_VERSION) || !limit) - return 1; + return 0; if (p >= limit) return -1; /* Skip past DTLS cookie */ @@ -1718,7 +2421,7 @@ int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, return -1; /* Now at start of extensions */ if ((p + 2) >= limit) - return 1; + return 0; n2s(p, i); while ((p + 4) <= limit) { @@ -1726,39 +2429,61 @@ int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, n2s(p, type); n2s(p, size); if (p + size > limit) - return 1; + return 0; if (type == TLSEXT_TYPE_session_ticket) { - /* If tickets disabled indicate cache miss which will - * trigger a full handshake - */ - if (SSL_get_options(s) & SSL_OP_NO_TICKET) - return 1; - /* If zero length note client will accept a ticket - * and indicate cache miss to trigger full handshake - */ + int r; if (size == 0) { + /* The client will accept a ticket but doesn't + * currently have one. */ s->tlsext_ticket_expected = 1; - return 0; /* Cache miss */ + return 1; } if (s->tls_session_secret_cb) { - /* Indicate cache miss here and instead of - * generating the session from ticket now, - * trigger abbreviated handshake based on - * external mechanism to calculate the master - * secret later. */ - return 0; + /* Indicate that the ticket couldn't be + * decrypted rather than generating the session + * from ticket now, trigger abbreviated + * handshake based on external mechanism to + * calculate the master secret later. */ + return 2; + } + r = tls_decrypt_ticket(s, p, size, session_id, len, ret); + switch (r) + { + case 2: /* ticket couldn't be decrypted */ + s->tlsext_ticket_expected = 1; + return 2; + case 3: /* ticket was decrypted */ + return r; + case 4: /* ticket decrypted but need to renew */ + s->tlsext_ticket_expected = 1; + return 3; + default: /* fatal error */ + return -1; } - return tls_decrypt_ticket(s, p, size, session_id, len, - ret); } p += size; } - return 1; + return 0; } +/* tls_decrypt_ticket attempts to decrypt a session ticket. + * + * etick: points to the body of the session ticket extension. + * eticklen: the length of the session tickets extenion. + * sess_id: points at the session ID. + * sesslen: the length of the session ID. + * psess: (output) on return, if a ticket was decrypted, then this is set to + * point to the resulting session. + * + * Returns: + * -1: fatal error, either from parsing or decrypting the ticket. + * 2: the ticket couldn't be decrypted. + * 3: a ticket was successfully decrypted and *psess was set. + * 4: same as 3, but the ticket needs to be renewed. + */ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, const unsigned char *sess_id, int sesslen, SSL_SESSION **psess) @@ -1773,7 +2498,7 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, SSL_CTX *tctx = s->initial_ctx; /* Need at least keyname + iv + some encrypted data */ if (eticklen < 48) - goto tickerr; + return 2; /* Initialize session ticket encryption and HMAC contexts */ HMAC_CTX_init(&hctx); EVP_CIPHER_CTX_init(&ctx); @@ -1785,7 +2510,7 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, if (rv < 0) return -1; if (rv == 0) - goto tickerr; + return 2; if (rv == 2) renew_ticket = 1; } @@ -1793,15 +2518,15 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, { /* Check key name matches */ if (memcmp(etick, tctx->tlsext_tick_key_name, 16)) - goto tickerr; + return 2; HMAC_Init_ex(&hctx, tctx->tlsext_tick_hmac_key, 16, tlsext_tick_md(), NULL); EVP_DecryptInit_ex(&ctx, EVP_aes_128_cbc(), NULL, tctx->tlsext_tick_aes_key, etick + 16); } /* Attempt to process session ticket, first conduct sanity and - * integrity checks on ticket. - */ + * integrity checks on ticket. + */ mlen = HMAC_size(&hctx); if (mlen < 0) { @@ -1813,8 +2538,8 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, HMAC_Update(&hctx, etick, eticklen); HMAC_Final(&hctx, tick_hmac, NULL); HMAC_CTX_cleanup(&hctx); - if (memcmp(tick_hmac, etick + eticklen, mlen)) - goto tickerr; + if (CRYPTO_memcmp(tick_hmac, etick + eticklen, mlen)) + return 2; /* Attempt to decrypt session data */ /* Move p after IV to start of encrypted ticket, update length */ p = etick + 16 + EVP_CIPHER_CTX_iv_length(&ctx); @@ -1827,33 +2552,373 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, int eticklen, } EVP_DecryptUpdate(&ctx, sdec, &slen, p, eticklen); if (EVP_DecryptFinal(&ctx, sdec + slen, &mlen) <= 0) - goto tickerr; + return 2; slen += mlen; EVP_CIPHER_CTX_cleanup(&ctx); p = sdec; - + sess = d2i_SSL_SESSION(NULL, &p, slen); OPENSSL_free(sdec); if (sess) { - /* The session ID if non-empty is used by some clients to - * detect that the ticket has been accepted. So we copy it to - * the session structure. If it is empty set length to zero - * as required by standard. - */ + /* The session ID, if non-empty, is used by some clients to + * detect that the ticket has been accepted. So we copy it to + * the session structure. If it is empty set length to zero + * as required by standard. + */ if (sesslen) memcpy(sess->session_id, sess_id, sesslen); sess->session_id_length = sesslen; *psess = sess; - s->tlsext_ticket_expected = renew_ticket; - return 1; + if (renew_ticket) + return 4; + else + return 3; } - /* If session decrypt failure indicate a cache miss and set state to - * send a new ticket - */ - tickerr: - s->tlsext_ticket_expected = 1; + ERR_clear_error(); + /* For session parse failure, indicate that we need to send a new + * ticket. */ + return 2; + } + +/* Tables to translate from NIDs to TLS v1.2 ids */ + +typedef struct + { + int nid; + int id; + } tls12_lookup; + +static tls12_lookup tls12_md[] = { +#ifndef OPENSSL_NO_MD5 + {NID_md5, TLSEXT_hash_md5}, +#endif +#ifndef OPENSSL_NO_SHA + {NID_sha1, TLSEXT_hash_sha1}, +#endif +#ifndef OPENSSL_NO_SHA256 + {NID_sha224, TLSEXT_hash_sha224}, + {NID_sha256, TLSEXT_hash_sha256}, +#endif +#ifndef OPENSSL_NO_SHA512 + {NID_sha384, TLSEXT_hash_sha384}, + {NID_sha512, TLSEXT_hash_sha512} +#endif +}; + +static tls12_lookup tls12_sig[] = { +#ifndef OPENSSL_NO_RSA + {EVP_PKEY_RSA, TLSEXT_signature_rsa}, +#endif +#ifndef OPENSSL_NO_DSA + {EVP_PKEY_DSA, TLSEXT_signature_dsa}, +#endif +#ifndef OPENSSL_NO_ECDSA + {EVP_PKEY_EC, TLSEXT_signature_ecdsa} +#endif +}; + +static int tls12_find_id(int nid, tls12_lookup *table, size_t tlen) + { + size_t i; + for (i = 0; i < tlen; i++) + { + if (table[i].nid == nid) + return table[i].id; + } + return -1; + } + +int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk, const EVP_MD *md) + { + int sig_id, md_id; + if (!md) + return 0; + md_id = tls12_find_id(EVP_MD_type(md), tls12_md, + sizeof(tls12_md)/sizeof(tls12_lookup)); + if (md_id == -1) + return 0; + sig_id = tls12_get_sigid(pk); + if (sig_id == -1) + return 0; + p[0] = (unsigned char)md_id; + p[1] = (unsigned char)sig_id; + return 1; + } + +/* tls12_get_sigid returns the TLS 1.2 SignatureAlgorithm value corresponding + * to the given public key, or -1 if not known. */ +int tls12_get_sigid(const EVP_PKEY *pk) + { + return tls12_find_id(pk->type, tls12_sig, + sizeof(tls12_sig)/sizeof(tls12_lookup)); + } + +const EVP_MD *tls12_get_hash(unsigned char hash_alg) + { + switch(hash_alg) + { +#ifndef OPENSSL_NO_SHA + case TLSEXT_hash_sha1: + return EVP_sha1(); +#endif +#ifndef OPENSSL_NO_SHA256 + case TLSEXT_hash_sha224: + return EVP_sha224(); + + case TLSEXT_hash_sha256: + return EVP_sha256(); +#endif +#ifndef OPENSSL_NO_SHA512 + case TLSEXT_hash_sha384: + return EVP_sha384(); + + case TLSEXT_hash_sha512: + return EVP_sha512(); +#endif + default: + return NULL; + + } + } + +/* tls1_process_sigalgs processes a signature_algorithms extension and sets the + * digest functions accordingly for each key type. + * + * See RFC 5246, section 7.4.1.4.1. + * + * data: points to the content of the extension, not including type and length + * headers. + * dsize: the number of bytes of |data|. Must be even. + */ +void tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize) + { + int i; + const EVP_MD *md, **digest_ptr; + /* Extension ignored for TLS versions below 1.2 */ + if (TLS1_get_version(s) < TLS1_2_VERSION) + return; + + s->s3->digest_rsa = NULL; + s->s3->digest_dsa = NULL; + s->s3->digest_ecdsa = NULL; + + for (i = 0; i < dsize; i += 2) + { + unsigned char hash_alg = data[i], sig_alg = data[i+1]; + + switch(sig_alg) + { +#ifndef OPENSSL_NO_RSA + case TLSEXT_signature_rsa: + digest_ptr = &s->s3->digest_rsa; + break; +#endif +#ifndef OPENSSL_NO_DSA + case TLSEXT_signature_dsa: + digest_ptr = &s->s3->digest_dsa; + break; +#endif +#ifndef OPENSSL_NO_ECDSA + case TLSEXT_signature_ecdsa: + digest_ptr = &s->s3->digest_ecdsa; + break; +#endif + default: + continue; + } + + if (*digest_ptr == NULL) + { + md = tls12_get_hash(hash_alg); + if (md) + *digest_ptr = md; + } + + } + } + +#endif + +#ifndef OPENSSL_NO_HEARTBEATS +int +tls1_process_heartbeat(SSL *s) + { + unsigned char *p = &s->s3->rrec.data[0], *pl; + unsigned short hbtype; + unsigned int payload; + unsigned int padding = 16; /* Use minimum padding */ + + if (s->msg_callback) + s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT, + &s->s3->rrec.data[0], s->s3->rrec.length, + s, s->msg_callback_arg); + + /* Read type and payload length first */ + if (1 + 2 + 16 > s->s3->rrec.length) + return 0; /* silently discard */ + hbtype = *p++; + n2s(p, payload); + if (1 + 2 + payload + 16 > s->s3->rrec.length) + return 0; /* silently discard per RFC 6520 sec. 4 */ + pl = p; + + if (hbtype == TLS1_HB_REQUEST) + { + unsigned char *buffer, *bp; + int r; + + /* Allocate memory for the response, size is 1 bytes + * message type, plus 2 bytes payload length, plus + * payload, plus padding + */ + buffer = OPENSSL_malloc(1 + 2 + payload + padding); + bp = buffer; + + /* Enter response type, length and copy payload */ + *bp++ = TLS1_HB_RESPONSE; + s2n(payload, bp); + memcpy(bp, pl, payload); + bp += payload; + /* Random padding */ + RAND_pseudo_bytes(bp, padding); + + r = ssl3_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, 3 + payload + padding); + + if (r >= 0 && s->msg_callback) + s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT, + buffer, 3 + payload + padding, + s, s->msg_callback_arg); + + OPENSSL_free(buffer); + + if (r < 0) + return r; + } + else if (hbtype == TLS1_HB_RESPONSE) + { + unsigned int seq; + + /* We only send sequence numbers (2 bytes unsigned int), + * and 16 random bytes, so we just try to read the + * sequence number */ + n2s(pl, seq); + + if (payload == 18 && seq == s->tlsext_hb_seq) + { + s->tlsext_hb_seq++; + s->tlsext_hb_pending = 0; + } + } + return 0; } +int +tls1_heartbeat(SSL *s) + { + unsigned char *buf, *p; + int ret; + unsigned int payload = 18; /* Sequence number + random bytes */ + unsigned int padding = 16; /* Use minimum padding */ + + /* Only send if peer supports and accepts HB requests... */ + if (!(s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) || + s->tlsext_heartbeat & SSL_TLSEXT_HB_DONT_SEND_REQUESTS) + { + SSLerr(SSL_F_TLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT); + return -1; + } + + /* ...and there is none in flight yet... */ + if (s->tlsext_hb_pending) + { + SSLerr(SSL_F_TLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PENDING); + return -1; + } + + /* ...and no handshake in progress. */ + if (SSL_in_init(s) || s->in_handshake) + { + SSLerr(SSL_F_TLS1_HEARTBEAT,SSL_R_UNEXPECTED_MESSAGE); + return -1; + } + + /* Check if padding is too long, payload and padding + * must not exceed 2^14 - 3 = 16381 bytes in total. + */ + OPENSSL_assert(payload + padding <= 16381); + + /* Create HeartBeat message, we just use a sequence number + * as payload to distuingish different messages and add + * some random stuff. + * - Message Type, 1 byte + * - Payload Length, 2 bytes (unsigned int) + * - Payload, the sequence number (2 bytes uint) + * - Payload, random bytes (16 bytes uint) + * - Padding + */ + buf = OPENSSL_malloc(1 + 2 + payload + padding); + p = buf; + /* Message Type */ + *p++ = TLS1_HB_REQUEST; + /* Payload length (18 bytes here) */ + s2n(payload, p); + /* Sequence number */ + s2n(s->tlsext_hb_seq, p); + /* 16 random bytes */ + RAND_pseudo_bytes(p, 16); + p += 16; + /* Random padding */ + RAND_pseudo_bytes(p, padding); + + ret = ssl3_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding); + if (ret >= 0) + { + if (s->msg_callback) + s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT, + buf, 3 + payload + padding, + s, s->msg_callback_arg); + + s->tlsext_hb_pending = 1; + } + + OPENSSL_free(buf); + + return ret; + } +#endif + +#if !defined(OPENSSL_NO_TLSEXT) +/* tls1_channel_id_hash calculates the signed data for a Channel ID on the given + * SSL connection and writes it to |md|. + */ +int +tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s) + { + EVP_MD_CTX ctx; + unsigned char temp_digest[EVP_MAX_MD_SIZE]; + unsigned temp_digest_len; + int i; + static const char kClientIDMagic[] = "TLS Channel ID signature"; + + if (s->s3->handshake_buffer) + if (!ssl3_digest_cached_records(s)) + return 0; + + EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic)); + + EVP_MD_CTX_init(&ctx); + for (i = 0; i < SSL_MAX_DIGEST; i++) + { + if (s->s3->handshake_dgst[i] == NULL) + continue; + EVP_MD_CTX_copy_ex(&ctx, s->s3->handshake_dgst[i]); + EVP_DigestFinal_ex(&ctx, temp_digest, &temp_digest_len); + EVP_DigestUpdate(md, temp_digest, temp_digest_len); + } + EVP_MD_CTX_cleanup(&ctx); + + return 1; + } #endif diff --git a/app/openssl/ssl/t1_meth.c b/app/openssl/ssl/t1_meth.c index 6ce7c0bb..53c807de 100644 --- a/app/openssl/ssl/t1_meth.c +++ b/app/openssl/ssl/t1_meth.c @@ -60,16 +60,28 @@ #include #include "ssl_locl.h" -static const SSL_METHOD *tls1_get_method(int ver); static const SSL_METHOD *tls1_get_method(int ver) { + if (ver == TLS1_2_VERSION) + return TLSv1_2_method(); + if (ver == TLS1_1_VERSION) + return TLSv1_1_method(); if (ver == TLS1_VERSION) - return(TLSv1_method()); - else - return(NULL); + return TLSv1_method(); + return NULL; } -IMPLEMENT_tls1_meth_func(TLSv1_method, +IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_method, + ssl3_accept, + ssl3_connect, + tls1_get_method) + +IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_method, + ssl3_accept, + ssl3_connect, + tls1_get_method) + +IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_method, ssl3_accept, ssl3_connect, tls1_get_method) diff --git a/app/openssl/ssl/t1_srvr.c b/app/openssl/ssl/t1_srvr.c index 42525e9e..f1d15657 100644 --- a/app/openssl/ssl/t1_srvr.c +++ b/app/openssl/ssl/t1_srvr.c @@ -67,13 +67,26 @@ static const SSL_METHOD *tls1_get_server_method(int ver); static const SSL_METHOD *tls1_get_server_method(int ver) { + if (ver == TLS1_2_VERSION) + return TLSv1_2_server_method(); + if (ver == TLS1_1_VERSION) + return TLSv1_1_server_method(); if (ver == TLS1_VERSION) - return(TLSv1_server_method()); - else - return(NULL); + return TLSv1_server_method(); + return NULL; } -IMPLEMENT_tls1_meth_func(TLSv1_server_method, +IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_server_method, + ssl3_accept, + ssl_undefined_function, + tls1_get_server_method) + +IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_server_method, + ssl3_accept, + ssl_undefined_function, + tls1_get_server_method) + +IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_server_method, ssl3_accept, ssl_undefined_function, tls1_get_server_method) diff --git a/app/openssl/ssl/tls1.h b/app/openssl/ssl/tls1.h index 76f368ac..ec8948d5 100644 --- a/app/openssl/ssl/tls1.h +++ b/app/openssl/ssl/tls1.h @@ -159,10 +159,24 @@ extern "C" { #define TLS1_ALLOW_EXPERIMENTAL_CIPHERSUITES 0 +#define TLS1_2_VERSION 0x0303 +#define TLS1_2_VERSION_MAJOR 0x03 +#define TLS1_2_VERSION_MINOR 0x03 + +#define TLS1_1_VERSION 0x0302 +#define TLS1_1_VERSION_MAJOR 0x03 +#define TLS1_1_VERSION_MINOR 0x02 + #define TLS1_VERSION 0x0301 #define TLS1_VERSION_MAJOR 0x03 #define TLS1_VERSION_MINOR 0x01 +#define TLS1_get_version(s) \ + ((s->version >> 8) == TLS1_VERSION_MAJOR ? s->version : 0) + +#define TLS1_get_client_version(s) \ + ((s->client_version >> 8) == TLS1_VERSION_MAJOR ? s->client_version : 0) + #define TLS1_AD_DECRYPTION_FAILED 21 #define TLS1_AD_RECORD_OVERFLOW 22 #define TLS1_AD_UNKNOWN_CA 48 /* fatal */ @@ -183,17 +197,51 @@ extern "C" { #define TLS1_AD_BAD_CERTIFICATE_HASH_VALUE 114 #define TLS1_AD_UNKNOWN_PSK_IDENTITY 115 /* fatal */ -/* ExtensionType values from RFC3546 / RFC4366 */ +/* ExtensionType values from RFC3546 / RFC4366 / RFC6066 */ #define TLSEXT_TYPE_server_name 0 #define TLSEXT_TYPE_max_fragment_length 1 #define TLSEXT_TYPE_client_certificate_url 2 #define TLSEXT_TYPE_trusted_ca_keys 3 #define TLSEXT_TYPE_truncated_hmac 4 #define TLSEXT_TYPE_status_request 5 +/* ExtensionType values from RFC4681 */ +#define TLSEXT_TYPE_user_mapping 6 + +/* ExtensionType values from RFC5878 */ +#define TLSEXT_TYPE_client_authz 7 +#define TLSEXT_TYPE_server_authz 8 + +/* ExtensionType values from RFC6091 */ +#define TLSEXT_TYPE_cert_type 9 + /* ExtensionType values from RFC4492 */ #define TLSEXT_TYPE_elliptic_curves 10 #define TLSEXT_TYPE_ec_point_formats 11 + +/* ExtensionType value from RFC5054 */ +#define TLSEXT_TYPE_srp 12 + +/* ExtensionType values from RFC5246 */ +#define TLSEXT_TYPE_signature_algorithms 13 + +/* ExtensionType value from RFC5764 */ +#define TLSEXT_TYPE_use_srtp 14 + +/* ExtensionType value from RFC5620 */ +#define TLSEXT_TYPE_heartbeat 15 + +/* ExtensionType value for TLS padding extension. + * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml + * http://tools.ietf.org/html/draft-agl-tls-padding-03 + */ +#define TLSEXT_TYPE_padding 21 + +/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */ +#define TLSEXT_TYPE_application_layer_protocol_negotiation 16 + +/* ExtensionType value from RFC4507 */ #define TLSEXT_TYPE_session_ticket 35 + /* ExtensionType value from draft-rescorla-tls-opaque-prf-input-00.txt */ #if 0 /* will have to be provided externally for now , * i.e. build with -DTLSEXT_TYPE_opaque_prf_input=38183 @@ -209,6 +257,9 @@ extern "C" { #define TLSEXT_TYPE_next_proto_neg 13172 #endif +/* This is not an IANA defined extension number */ +#define TLSEXT_TYPE_channel_id 30031 + /* NameType value from RFC 3546 */ #define TLSEXT_NAMETYPE_host_name 0 /* status request value from RFC 3546 */ @@ -221,12 +272,37 @@ extern "C" { #define TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2 2 #define TLSEXT_ECPOINTFORMAT_last 2 +/* Signature and hash algorithms from RFC 5246 */ + +#define TLSEXT_signature_anonymous 0 +#define TLSEXT_signature_rsa 1 +#define TLSEXT_signature_dsa 2 +#define TLSEXT_signature_ecdsa 3 + +#define TLSEXT_hash_none 0 +#define TLSEXT_hash_md5 1 +#define TLSEXT_hash_sha1 2 +#define TLSEXT_hash_sha224 3 +#define TLSEXT_hash_sha256 4 +#define TLSEXT_hash_sha384 5 +#define TLSEXT_hash_sha512 6 + #ifndef OPENSSL_NO_TLSEXT #define TLSEXT_MAXLEN_host_name 255 -const char *SSL_get_servername(const SSL *s, const int type) ; -int SSL_get_servername_type(const SSL *s) ; +const char *SSL_get_servername(const SSL *s, const int type); +int SSL_get_servername_type(const SSL *s); +/* SSL_export_keying_material exports a value derived from the master secret, + * as specified in RFC 5705. It writes |olen| bytes to |out| given a label and + * optional context. (Since a zero length context is allowed, the |use_context| + * flag controls whether a context is included.) + * + * It returns 1 on success and zero otherwise. + */ +int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, + const char *label, size_t llen, const unsigned char *p, size_t plen, + int use_context); #define SSL_set_tlsext_host_name(s,name) \ SSL_ctrl(s,SSL_CTRL_SET_TLSEXT_HOSTNAME,TLSEXT_NAMETYPE_host_name,(char *)name) @@ -290,6 +366,16 @@ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG, 0, arg) #define SSL_CTX_set_tlsext_ticket_key_cb(ssl, cb) \ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) +#ifndef OPENSSL_NO_HEARTBEATS +#define SSL_TLSEXT_HB_ENABLED 0x01 +#define SSL_TLSEXT_HB_DONT_SEND_REQUESTS 0x02 +#define SSL_TLSEXT_HB_DONT_RECV_REQUESTS 0x04 + +#define SSL_get_tlsext_heartbeat_pending(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING,0,NULL) +#define SSL_set_tlsext_heartbeat_no_requests(ssl, arg) \ + SSL_ctrl((ssl),SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS,arg,NULL) +#endif #endif /* PSK ciphersuites from 4279 */ @@ -327,6 +413,14 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_AES_256_SHA 0x03000039 #define TLS1_CK_ADH_WITH_AES_256_SHA 0x0300003A +/* TLS v1.2 ciphersuites */ +#define TLS1_CK_RSA_WITH_NULL_SHA256 0x0300003B +#define TLS1_CK_RSA_WITH_AES_128_SHA256 0x0300003C +#define TLS1_CK_RSA_WITH_AES_256_SHA256 0x0300003D +#define TLS1_CK_DH_DSS_WITH_AES_128_SHA256 0x0300003E +#define TLS1_CK_DH_RSA_WITH_AES_128_SHA256 0x0300003F +#define TLS1_CK_DHE_DSS_WITH_AES_128_SHA256 0x03000040 + /* Camellia ciphersuites from RFC4132 */ #define TLS1_CK_RSA_WITH_CAMELLIA_128_CBC_SHA 0x03000041 #define TLS1_CK_DH_DSS_WITH_CAMELLIA_128_CBC_SHA 0x03000042 @@ -335,6 +429,16 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA 0x03000045 #define TLS1_CK_ADH_WITH_CAMELLIA_128_CBC_SHA 0x03000046 +/* TLS v1.2 ciphersuites */ +#define TLS1_CK_DHE_RSA_WITH_AES_128_SHA256 0x03000067 +#define TLS1_CK_DH_DSS_WITH_AES_256_SHA256 0x03000068 +#define TLS1_CK_DH_RSA_WITH_AES_256_SHA256 0x03000069 +#define TLS1_CK_DHE_DSS_WITH_AES_256_SHA256 0x0300006A +#define TLS1_CK_DHE_RSA_WITH_AES_256_SHA256 0x0300006B +#define TLS1_CK_ADH_WITH_AES_128_SHA256 0x0300006C +#define TLS1_CK_ADH_WITH_AES_256_SHA256 0x0300006D + +/* Camellia ciphersuites from RFC4132 */ #define TLS1_CK_RSA_WITH_CAMELLIA_256_CBC_SHA 0x03000084 #define TLS1_CK_DH_DSS_WITH_CAMELLIA_256_CBC_SHA 0x03000085 #define TLS1_CK_DH_RSA_WITH_CAMELLIA_256_CBC_SHA 0x03000086 @@ -350,6 +454,20 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_DHE_RSA_WITH_SEED_SHA 0x0300009A #define TLS1_CK_ADH_WITH_SEED_SHA 0x0300009B +/* TLS v1.2 GCM ciphersuites from RFC5288 */ +#define TLS1_CK_RSA_WITH_AES_128_GCM_SHA256 0x0300009C +#define TLS1_CK_RSA_WITH_AES_256_GCM_SHA384 0x0300009D +#define TLS1_CK_DHE_RSA_WITH_AES_128_GCM_SHA256 0x0300009E +#define TLS1_CK_DHE_RSA_WITH_AES_256_GCM_SHA384 0x0300009F +#define TLS1_CK_DH_RSA_WITH_AES_128_GCM_SHA256 0x030000A0 +#define TLS1_CK_DH_RSA_WITH_AES_256_GCM_SHA384 0x030000A1 +#define TLS1_CK_DHE_DSS_WITH_AES_128_GCM_SHA256 0x030000A2 +#define TLS1_CK_DHE_DSS_WITH_AES_256_GCM_SHA384 0x030000A3 +#define TLS1_CK_DH_DSS_WITH_AES_128_GCM_SHA256 0x030000A4 +#define TLS1_CK_DH_DSS_WITH_AES_256_GCM_SHA384 0x030000A5 +#define TLS1_CK_ADH_WITH_AES_128_GCM_SHA256 0x030000A6 +#define TLS1_CK_ADH_WITH_AES_256_GCM_SHA384 0x030000A7 + /* ECC ciphersuites from draft-ietf-tls-ecc-12.txt with changes soon to be in draft 13 */ #define TLS1_CK_ECDH_ECDSA_WITH_NULL_SHA 0x0300C001 #define TLS1_CK_ECDH_ECDSA_WITH_RC4_128_SHA 0x0300C002 @@ -381,6 +499,42 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_CK_ECDH_anon_WITH_AES_128_CBC_SHA 0x0300C018 #define TLS1_CK_ECDH_anon_WITH_AES_256_CBC_SHA 0x0300C019 +/* SRP ciphersuites from RFC 5054 */ +#define TLS1_CK_SRP_SHA_WITH_3DES_EDE_CBC_SHA 0x0300C01A +#define TLS1_CK_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA 0x0300C01B +#define TLS1_CK_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA 0x0300C01C +#define TLS1_CK_SRP_SHA_WITH_AES_128_CBC_SHA 0x0300C01D +#define TLS1_CK_SRP_SHA_RSA_WITH_AES_128_CBC_SHA 0x0300C01E +#define TLS1_CK_SRP_SHA_DSS_WITH_AES_128_CBC_SHA 0x0300C01F +#define TLS1_CK_SRP_SHA_WITH_AES_256_CBC_SHA 0x0300C020 +#define TLS1_CK_SRP_SHA_RSA_WITH_AES_256_CBC_SHA 0x0300C021 +#define TLS1_CK_SRP_SHA_DSS_WITH_AES_256_CBC_SHA 0x0300C022 + +/* ECDH HMAC based ciphersuites from RFC5289 */ + +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_SHA256 0x0300C023 +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_256_SHA384 0x0300C024 +#define TLS1_CK_ECDH_ECDSA_WITH_AES_128_SHA256 0x0300C025 +#define TLS1_CK_ECDH_ECDSA_WITH_AES_256_SHA384 0x0300C026 +#define TLS1_CK_ECDHE_RSA_WITH_AES_128_SHA256 0x0300C027 +#define TLS1_CK_ECDHE_RSA_WITH_AES_256_SHA384 0x0300C028 +#define TLS1_CK_ECDH_RSA_WITH_AES_128_SHA256 0x0300C029 +#define TLS1_CK_ECDH_RSA_WITH_AES_256_SHA384 0x0300C02A + +/* ECDH GCM based ciphersuites from RFC5289 */ +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 0x0300C02B +#define TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 0x0300C02C +#define TLS1_CK_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 0x0300C02D +#define TLS1_CK_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 0x0300C02E +#define TLS1_CK_ECDHE_RSA_WITH_AES_128_GCM_SHA256 0x0300C02F +#define TLS1_CK_ECDHE_RSA_WITH_AES_256_GCM_SHA384 0x0300C030 +#define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 +#define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 + +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037 +#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038 + /* XXX * Inconsistency alert: * The OpenSSL names of ciphers with ephemeral DH here include the string @@ -448,6 +602,17 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_PSK_WITH_AES_128_CBC_SHA "PSK-AES128-CBC-SHA" #define TLS1_TXT_PSK_WITH_AES_256_CBC_SHA "PSK-AES256-CBC-SHA" +/* SRP ciphersuite from RFC 5054 */ +#define TLS1_TXT_SRP_SHA_WITH_3DES_EDE_CBC_SHA "SRP-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA "SRP-RSA-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA "SRP-DSS-3DES-EDE-CBC-SHA" +#define TLS1_TXT_SRP_SHA_WITH_AES_128_CBC_SHA "SRP-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_AES_128_CBC_SHA "SRP-RSA-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_AES_128_CBC_SHA "SRP-DSS-AES-128-CBC-SHA" +#define TLS1_TXT_SRP_SHA_WITH_AES_256_CBC_SHA "SRP-AES-256-CBC-SHA" +#define TLS1_TXT_SRP_SHA_RSA_WITH_AES_256_CBC_SHA "SRP-RSA-AES-256-CBC-SHA" +#define TLS1_TXT_SRP_SHA_DSS_WITH_AES_256_CBC_SHA "SRP-DSS-AES-256-CBC-SHA" + /* Camellia ciphersuites from RFC4132 */ #define TLS1_TXT_RSA_WITH_CAMELLIA_128_CBC_SHA "CAMELLIA128-SHA" #define TLS1_TXT_DH_DSS_WITH_CAMELLIA_128_CBC_SHA "DH-DSS-CAMELLIA128-SHA" @@ -471,6 +636,59 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) #define TLS1_TXT_DHE_RSA_WITH_SEED_SHA "DHE-RSA-SEED-SHA" #define TLS1_TXT_ADH_WITH_SEED_SHA "ADH-SEED-SHA" +/* TLS v1.2 ciphersuites */ +#define TLS1_TXT_RSA_WITH_NULL_SHA256 "NULL-SHA256" +#define TLS1_TXT_RSA_WITH_AES_128_SHA256 "AES128-SHA256" +#define TLS1_TXT_RSA_WITH_AES_256_SHA256 "AES256-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_128_SHA256 "DH-DSS-AES128-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_128_SHA256 "DH-RSA-AES128-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_128_SHA256 "DHE-DSS-AES128-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_128_SHA256 "DHE-RSA-AES128-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_256_SHA256 "DH-DSS-AES256-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_256_SHA256 "DH-RSA-AES256-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_256_SHA256 "DHE-DSS-AES256-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_256_SHA256 "DHE-RSA-AES256-SHA256" +#define TLS1_TXT_ADH_WITH_AES_128_SHA256 "ADH-AES128-SHA256" +#define TLS1_TXT_ADH_WITH_AES_256_SHA256 "ADH-AES256-SHA256" + +/* TLS v1.2 GCM ciphersuites from RFC5288 */ +#define TLS1_TXT_RSA_WITH_AES_128_GCM_SHA256 "AES128-GCM-SHA256" +#define TLS1_TXT_RSA_WITH_AES_256_GCM_SHA384 "AES256-GCM-SHA384" +#define TLS1_TXT_DHE_RSA_WITH_AES_128_GCM_SHA256 "DHE-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_DHE_RSA_WITH_AES_256_GCM_SHA384 "DHE-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_DH_RSA_WITH_AES_128_GCM_SHA256 "DH-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_DH_RSA_WITH_AES_256_GCM_SHA384 "DH-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_DHE_DSS_WITH_AES_128_GCM_SHA256 "DHE-DSS-AES128-GCM-SHA256" +#define TLS1_TXT_DHE_DSS_WITH_AES_256_GCM_SHA384 "DHE-DSS-AES256-GCM-SHA384" +#define TLS1_TXT_DH_DSS_WITH_AES_128_GCM_SHA256 "DH-DSS-AES128-GCM-SHA256" +#define TLS1_TXT_DH_DSS_WITH_AES_256_GCM_SHA384 "DH-DSS-AES256-GCM-SHA384" +#define TLS1_TXT_ADH_WITH_AES_128_GCM_SHA256 "ADH-AES128-GCM-SHA256" +#define TLS1_TXT_ADH_WITH_AES_256_GCM_SHA384 "ADH-AES256-GCM-SHA384" + +/* ECDH HMAC based ciphersuites from RFC5289 */ + +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_SHA256 "ECDHE-ECDSA-AES128-SHA256" +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_SHA384 "ECDHE-ECDSA-AES256-SHA384" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_128_SHA256 "ECDH-ECDSA-AES128-SHA256" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_256_SHA384 "ECDH-ECDSA-AES256-SHA384" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_128_SHA256 "ECDHE-RSA-AES128-SHA256" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_256_SHA384 "ECDHE-RSA-AES256-SHA384" +#define TLS1_TXT_ECDH_RSA_WITH_AES_128_SHA256 "ECDH-RSA-AES128-SHA256" +#define TLS1_TXT_ECDH_RSA_WITH_AES_256_SHA384 "ECDH-RSA-AES256-SHA384" + +/* ECDH GCM based ciphersuites from RFC5289 */ +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 "ECDHE-ECDSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 "ECDHE-ECDSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 "ECDH-ECDSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 "ECDH-ECDSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_128_GCM_SHA256 "ECDHE-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDHE_RSA_WITH_AES_256_GCM_SHA384 "ECDHE-RSA-AES256-GCM-SHA384" +#define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" +#define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" + +/* ECDHE PSK ciphersuites from RFC 5489 */ +#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256" +#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384" #define TLS_CT_RSA_SIGN 1 #define TLS_CT_DSS_SIGN 2 diff --git a/app/openssl/ssl/tls_srp.c b/app/openssl/ssl/tls_srp.c new file mode 100644 index 00000000..2315a7c0 --- /dev/null +++ b/app/openssl/ssl/tls_srp.c @@ -0,0 +1,507 @@ +/* ssl/tls_srp.c */ +/* Written by Christophe Renou (christophe.renou@edelweb.fr) with + * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr) + * for the EdelKey project and contributed to the OpenSSL project 2004. + */ +/* ==================================================================== + * Copyright (c) 2004-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +#include "ssl_locl.h" +#ifndef OPENSSL_NO_SRP + +#include +#include +#include + +int SSL_CTX_SRP_CTX_free(struct ssl_ctx_st *ctx) + { + if (ctx == NULL) + return 0; + OPENSSL_free(ctx->srp_ctx.login); + BN_free(ctx->srp_ctx.N); + BN_free(ctx->srp_ctx.g); + BN_free(ctx->srp_ctx.s); + BN_free(ctx->srp_ctx.B); + BN_free(ctx->srp_ctx.A); + BN_free(ctx->srp_ctx.a); + BN_free(ctx->srp_ctx.b); + BN_free(ctx->srp_ctx.v); + ctx->srp_ctx.TLS_ext_srp_username_callback = NULL; + ctx->srp_ctx.SRP_cb_arg = NULL; + ctx->srp_ctx.SRP_verify_param_callback = NULL; + ctx->srp_ctx.SRP_give_srp_client_pwd_callback = NULL; + ctx->srp_ctx.N = NULL; + ctx->srp_ctx.g = NULL; + ctx->srp_ctx.s = NULL; + ctx->srp_ctx.B = NULL; + ctx->srp_ctx.A = NULL; + ctx->srp_ctx.a = NULL; + ctx->srp_ctx.b = NULL; + ctx->srp_ctx.v = NULL; + ctx->srp_ctx.login = NULL; + ctx->srp_ctx.info = NULL; + ctx->srp_ctx.strength = SRP_MINIMAL_N; + ctx->srp_ctx.srp_Mask = 0; + return (1); + } + +int SSL_SRP_CTX_free(struct ssl_st *s) + { + if (s == NULL) + return 0; + OPENSSL_free(s->srp_ctx.login); + BN_free(s->srp_ctx.N); + BN_free(s->srp_ctx.g); + BN_free(s->srp_ctx.s); + BN_free(s->srp_ctx.B); + BN_free(s->srp_ctx.A); + BN_free(s->srp_ctx.a); + BN_free(s->srp_ctx.b); + BN_free(s->srp_ctx.v); + s->srp_ctx.TLS_ext_srp_username_callback = NULL; + s->srp_ctx.SRP_cb_arg = NULL; + s->srp_ctx.SRP_verify_param_callback = NULL; + s->srp_ctx.SRP_give_srp_client_pwd_callback = NULL; + s->srp_ctx.N = NULL; + s->srp_ctx.g = NULL; + s->srp_ctx.s = NULL; + s->srp_ctx.B = NULL; + s->srp_ctx.A = NULL; + s->srp_ctx.a = NULL; + s->srp_ctx.b = NULL; + s->srp_ctx.v = NULL; + s->srp_ctx.login = NULL; + s->srp_ctx.info = NULL; + s->srp_ctx.strength = SRP_MINIMAL_N; + s->srp_ctx.srp_Mask = 0; + return (1); + } + +int SSL_SRP_CTX_init(struct ssl_st *s) + { + SSL_CTX *ctx; + + if ((s == NULL) || ((ctx = s->ctx) == NULL)) + return 0; + s->srp_ctx.SRP_cb_arg = ctx->srp_ctx.SRP_cb_arg; + /* set client Hello login callback */ + s->srp_ctx.TLS_ext_srp_username_callback = ctx->srp_ctx.TLS_ext_srp_username_callback; + /* set SRP N/g param callback for verification */ + s->srp_ctx.SRP_verify_param_callback = ctx->srp_ctx.SRP_verify_param_callback; + /* set SRP client passwd callback */ + s->srp_ctx.SRP_give_srp_client_pwd_callback = ctx->srp_ctx.SRP_give_srp_client_pwd_callback; + + s->srp_ctx.N = NULL; + s->srp_ctx.g = NULL; + s->srp_ctx.s = NULL; + s->srp_ctx.B = NULL; + s->srp_ctx.A = NULL; + s->srp_ctx.a = NULL; + s->srp_ctx.b = NULL; + s->srp_ctx.v = NULL; + s->srp_ctx.login = NULL; + s->srp_ctx.info = ctx->srp_ctx.info; + s->srp_ctx.strength = ctx->srp_ctx.strength; + + if (((ctx->srp_ctx.N != NULL) && + ((s->srp_ctx.N = BN_dup(ctx->srp_ctx.N)) == NULL)) || + ((ctx->srp_ctx.g != NULL) && + ((s->srp_ctx.g = BN_dup(ctx->srp_ctx.g)) == NULL)) || + ((ctx->srp_ctx.s != NULL) && + ((s->srp_ctx.s = BN_dup(ctx->srp_ctx.s)) == NULL)) || + ((ctx->srp_ctx.B != NULL) && + ((s->srp_ctx.B = BN_dup(ctx->srp_ctx.B)) == NULL)) || + ((ctx->srp_ctx.A != NULL) && + ((s->srp_ctx.A = BN_dup(ctx->srp_ctx.A)) == NULL)) || + ((ctx->srp_ctx.a != NULL) && + ((s->srp_ctx.a = BN_dup(ctx->srp_ctx.a)) == NULL)) || + ((ctx->srp_ctx.v != NULL) && + ((s->srp_ctx.v = BN_dup(ctx->srp_ctx.v)) == NULL)) || + ((ctx->srp_ctx.b != NULL) && + ((s->srp_ctx.b = BN_dup(ctx->srp_ctx.b)) == NULL))) + { + SSLerr(SSL_F_SSL_SRP_CTX_INIT,ERR_R_BN_LIB); + goto err; + } + if ((ctx->srp_ctx.login != NULL) && + ((s->srp_ctx.login = BUF_strdup(ctx->srp_ctx.login)) == NULL)) + { + SSLerr(SSL_F_SSL_SRP_CTX_INIT,ERR_R_INTERNAL_ERROR); + goto err; + } + s->srp_ctx.srp_Mask = ctx->srp_ctx.srp_Mask; + + return (1); +err: + OPENSSL_free(s->srp_ctx.login); + BN_free(s->srp_ctx.N); + BN_free(s->srp_ctx.g); + BN_free(s->srp_ctx.s); + BN_free(s->srp_ctx.B); + BN_free(s->srp_ctx.A); + BN_free(s->srp_ctx.a); + BN_free(s->srp_ctx.b); + BN_free(s->srp_ctx.v); + return (0); + } + +int SSL_CTX_SRP_CTX_init(struct ssl_ctx_st *ctx) + { + if (ctx == NULL) + return 0; + + ctx->srp_ctx.SRP_cb_arg = NULL; + /* set client Hello login callback */ + ctx->srp_ctx.TLS_ext_srp_username_callback = NULL; + /* set SRP N/g param callback for verification */ + ctx->srp_ctx.SRP_verify_param_callback = NULL; + /* set SRP client passwd callback */ + ctx->srp_ctx.SRP_give_srp_client_pwd_callback = NULL; + + ctx->srp_ctx.N = NULL; + ctx->srp_ctx.g = NULL; + ctx->srp_ctx.s = NULL; + ctx->srp_ctx.B = NULL; + ctx->srp_ctx.A = NULL; + ctx->srp_ctx.a = NULL; + ctx->srp_ctx.b = NULL; + ctx->srp_ctx.v = NULL; + ctx->srp_ctx.login = NULL; + ctx->srp_ctx.srp_Mask = 0; + ctx->srp_ctx.info = NULL; + ctx->srp_ctx.strength = SRP_MINIMAL_N; + + return (1); + } + +/* server side */ +int SSL_srp_server_param_with_username(SSL *s, int *ad) + { + unsigned char b[SSL_MAX_MASTER_KEY_LENGTH]; + int al; + + *ad = SSL_AD_UNKNOWN_PSK_IDENTITY; + if ((s->srp_ctx.TLS_ext_srp_username_callback !=NULL) && + ((al = s->srp_ctx.TLS_ext_srp_username_callback(s, ad, s->srp_ctx.SRP_cb_arg))!=SSL_ERROR_NONE)) + return al; + + *ad = SSL_AD_INTERNAL_ERROR; + if ((s->srp_ctx.N == NULL) || + (s->srp_ctx.g == NULL) || + (s->srp_ctx.s == NULL) || + (s->srp_ctx.v == NULL)) + return SSL3_AL_FATAL; + + if (RAND_bytes(b, sizeof(b)) <= 0) + return SSL3_AL_FATAL; + s->srp_ctx.b = BN_bin2bn(b,sizeof(b),NULL); + OPENSSL_cleanse(b,sizeof(b)); + + /* Calculate: B = (kv + g^b) % N */ + + return ((s->srp_ctx.B = SRP_Calc_B(s->srp_ctx.b, s->srp_ctx.N, s->srp_ctx.g, s->srp_ctx.v)) != NULL)? + SSL_ERROR_NONE:SSL3_AL_FATAL; + } + +/* If the server just has the raw password, make up a verifier entry on the fly */ +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, const char *grp) + { + SRP_gN *GN = SRP_get_default_gN(grp); + if(GN == NULL) return -1; + s->srp_ctx.N = BN_dup(GN->N); + s->srp_ctx.g = BN_dup(GN->g); + if(s->srp_ctx.v != NULL) + { + BN_clear_free(s->srp_ctx.v); + s->srp_ctx.v = NULL; + } + if(s->srp_ctx.s != NULL) + { + BN_clear_free(s->srp_ctx.s); + s->srp_ctx.s = NULL; + } + if(!SRP_create_verifier_BN(user, pass, &s->srp_ctx.s, &s->srp_ctx.v, GN->N, GN->g)) return -1; + + return 1; + } + +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info) + { + if (N!= NULL) + { + if (s->srp_ctx.N != NULL) + { + if (!BN_copy(s->srp_ctx.N,N)) + { + BN_free(s->srp_ctx.N); + s->srp_ctx.N = NULL; + } + } + else + s->srp_ctx.N = BN_dup(N); + } + if (g!= NULL) + { + if (s->srp_ctx.g != NULL) + { + if (!BN_copy(s->srp_ctx.g,g)) + { + BN_free(s->srp_ctx.g); + s->srp_ctx.g = NULL; + } + } + else + s->srp_ctx.g = BN_dup(g); + } + if (sa!= NULL) + { + if (s->srp_ctx.s != NULL) + { + if (!BN_copy(s->srp_ctx.s,sa)) + { + BN_free(s->srp_ctx.s); + s->srp_ctx.s = NULL; + } + } + else + s->srp_ctx.s = BN_dup(sa); + } + if (v!= NULL) + { + if (s->srp_ctx.v != NULL) + { + if (!BN_copy(s->srp_ctx.v,v)) + { + BN_free(s->srp_ctx.v); + s->srp_ctx.v = NULL; + } + } + else + s->srp_ctx.v = BN_dup(v); + } + s->srp_ctx.info = info; + + if (!(s->srp_ctx.N) || + !(s->srp_ctx.g) || + !(s->srp_ctx.s) || + !(s->srp_ctx.v)) + return -1; + + return 1; + } + +int SRP_generate_server_master_secret(SSL *s,unsigned char *master_key) + { + BIGNUM *K = NULL, *u = NULL; + int ret = -1, tmp_len; + unsigned char *tmp = NULL; + + if (!SRP_Verify_A_mod_N(s->srp_ctx.A,s->srp_ctx.N)) + goto err; + if (!(u = SRP_Calc_u(s->srp_ctx.A,s->srp_ctx.B,s->srp_ctx.N))) + goto err; + if (!(K = SRP_Calc_server_key(s->srp_ctx.A, s->srp_ctx.v, u, s->srp_ctx.b, s->srp_ctx.N))) + goto err; + + tmp_len = BN_num_bytes(K); + if ((tmp = OPENSSL_malloc(tmp_len)) == NULL) + goto err; + BN_bn2bin(K, tmp); + ret = s->method->ssl3_enc->generate_master_secret(s,master_key,tmp,tmp_len); +err: + if (tmp) + { + OPENSSL_cleanse(tmp,tmp_len) ; + OPENSSL_free(tmp); + } + BN_clear_free(K); + BN_clear_free(u); + return ret; + } + +/* client side */ +int SRP_generate_client_master_secret(SSL *s,unsigned char *master_key) + { + BIGNUM *x = NULL, *u = NULL, *K = NULL; + int ret = -1, tmp_len; + char *passwd = NULL; + unsigned char *tmp = NULL; + + /* Checks if b % n == 0 + */ + if (SRP_Verify_B_mod_N(s->srp_ctx.B,s->srp_ctx.N)==0) goto err; + if (!(u = SRP_Calc_u(s->srp_ctx.A,s->srp_ctx.B,s->srp_ctx.N))) goto err; + if (s->srp_ctx.SRP_give_srp_client_pwd_callback == NULL) goto err; + if (!(passwd = s->srp_ctx.SRP_give_srp_client_pwd_callback(s, s->srp_ctx.SRP_cb_arg))) goto err; + if (!(x = SRP_Calc_x(s->srp_ctx.s,s->srp_ctx.login,passwd))) goto err; + if (!(K = SRP_Calc_client_key(s->srp_ctx.N, s->srp_ctx.B, s->srp_ctx.g, x, s->srp_ctx.a, u))) goto err; + + tmp_len = BN_num_bytes(K); + if ((tmp = OPENSSL_malloc(tmp_len)) == NULL) goto err; + BN_bn2bin(K, tmp); + ret = s->method->ssl3_enc->generate_master_secret(s,master_key,tmp,tmp_len); +err: + if (tmp) + { + OPENSSL_cleanse(tmp,tmp_len) ; + OPENSSL_free(tmp); + } + BN_clear_free(K); + BN_clear_free(x); + if (passwd) + { + OPENSSL_cleanse(passwd,strlen(passwd)) ; + OPENSSL_free(passwd); + } + BN_clear_free(u); + return ret; + } + +int SRP_Calc_A_param(SSL *s) + { + unsigned char rnd[SSL_MAX_MASTER_KEY_LENGTH]; + + if (BN_num_bits(s->srp_ctx.N) < s->srp_ctx.strength) + return -1; + + if (s->srp_ctx.SRP_verify_param_callback ==NULL && + !SRP_check_known_gN_param(s->srp_ctx.g,s->srp_ctx.N)) + return -1 ; + + RAND_bytes(rnd, sizeof(rnd)); + s->srp_ctx.a = BN_bin2bn(rnd, sizeof(rnd), s->srp_ctx.a); + OPENSSL_cleanse(rnd, sizeof(rnd)); + + if (!(s->srp_ctx.A = SRP_Calc_A(s->srp_ctx.a,s->srp_ctx.N,s->srp_ctx.g))) + return -1; + + /* We can have a callback to verify SRP param!! */ + if (s->srp_ctx.SRP_verify_param_callback !=NULL) + return s->srp_ctx.SRP_verify_param_callback(s,s->srp_ctx.SRP_cb_arg); + + return 1; + } + +BIGNUM *SSL_get_srp_g(SSL *s) + { + if (s->srp_ctx.g != NULL) + return s->srp_ctx.g; + return s->ctx->srp_ctx.g; + } + +BIGNUM *SSL_get_srp_N(SSL *s) + { + if (s->srp_ctx.N != NULL) + return s->srp_ctx.N; + return s->ctx->srp_ctx.N; + } + +char *SSL_get_srp_username(SSL *s) + { + if (s->srp_ctx.login != NULL) + return s->srp_ctx.login; + return s->ctx->srp_ctx.login; + } + +char *SSL_get_srp_userinfo(SSL *s) + { + if (s->srp_ctx.info != NULL) + return s->srp_ctx.info; + return s->ctx->srp_ctx.info; + } + +#define tls1_ctx_ctrl ssl3_ctx_ctrl +#define tls1_ctx_callback_ctrl ssl3_ctx_callback_ctrl + +int SSL_CTX_set_srp_username(SSL_CTX *ctx,char *name) + { + return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_USERNAME,0,name); + } + +int SSL_CTX_set_srp_password(SSL_CTX *ctx,char *password) + { + return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD,0,password); + } + +int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength) + { + return tls1_ctx_ctrl(ctx, SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH, strength, + NULL); + } + +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, int (*cb)(SSL *,void *)) + { + return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_SRP_VERIFY_PARAM_CB, + (void (*)(void))cb); + } + +int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg) + { + return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_SRP_ARG,0,arg); + } + +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb)(SSL *,int *,void *)) + { + return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB, + (void (*)(void))cb); + } + +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, char *(*cb)(SSL *,void *)) + { + return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB, + (void (*)(void))cb); + } + +#endif -- cgit v1.2.3