summaryrefslogtreecommitdiff
path: root/app/openssl/crypto/sha/asm
diff options
context:
space:
mode:
authorParménides GV <parmegv@sdf.org>2015-06-16 11:28:05 +0200
committerParménides GV <parmegv@sdf.org>2015-06-16 11:28:05 +0200
commit66c79953db6876ec17a7ebf50dc4fd07d24fae37 (patch)
tree67075abe6ea28f7fc7213f654b86464b13507058 /app/openssl/crypto/sha/asm
parent1f41fec6765e49838141ad29151713c7ac3dd17c (diff)
parente533cf6939e3ea4233aa8a82812f8ce5fcb565ca (diff)
Merge branch 'develop'0.9.4
Diffstat (limited to 'app/openssl/crypto/sha/asm')
-rw-r--r--app/openssl/crypto/sha/asm/README1
-rw-r--r--app/openssl/crypto/sha/asm/sha1-586.S2639
-rw-r--r--app/openssl/crypto/sha/asm/sha1-586.pl1229
-rw-r--r--app/openssl/crypto/sha/asm/sha1-alpha.pl322
-rw-r--r--app/openssl/crypto/sha/asm/sha1-armv4-large.S1450
-rw-r--r--app/openssl/crypto/sha/asm/sha1-armv4-large.pl678
-rw-r--r--app/openssl/crypto/sha/asm/sha1-armv8.S1211
-rw-r--r--app/openssl/crypto/sha/asm/sha1-armv8.pl333
-rw-r--r--app/openssl/crypto/sha/asm/sha1-ia64.pl305
-rw-r--r--app/openssl/crypto/sha/asm/sha1-mips.S1664
-rw-r--r--app/openssl/crypto/sha/asm/sha1-mips.pl354
-rw-r--r--app/openssl/crypto/sha/asm/sha1-parisc.pl260
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha1-ppc.pl326
-rw-r--r--app/openssl/crypto/sha/asm/sha1-s390x.pl246
-rw-r--r--app/openssl/crypto/sha/asm/sha1-sparcv9.pl284
-rw-r--r--app/openssl/crypto/sha/asm/sha1-sparcv9a.pl601
-rw-r--r--app/openssl/crypto/sha/asm/sha1-thumb.pl259
-rw-r--r--app/openssl/crypto/sha/asm/sha1-x86_64.S2486
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha1-x86_64.pl1261
-rw-r--r--app/openssl/crypto/sha/asm/sha256-586.S258
-rw-r--r--app/openssl/crypto/sha/asm/sha256-586.pl249
-rw-r--r--app/openssl/crypto/sha/asm/sha256-armv4.S2690
-rw-r--r--app/openssl/crypto/sha/asm/sha256-armv4.pl656
-rw-r--r--app/openssl/crypto/sha/asm/sha256-armv8.S1141
-rw-r--r--app/openssl/crypto/sha/asm/sha256-mips.S1998
-rw-r--r--app/openssl/crypto/sha/asm/sha256-x86_64.S1778
-rw-r--r--app/openssl/crypto/sha/asm/sha512-586.S836
-rw-r--r--app/openssl/crypto/sha/asm/sha512-586.pl644
-rw-r--r--app/openssl/crypto/sha/asm/sha512-armv4.S1783
-rw-r--r--app/openssl/crypto/sha/asm/sha512-armv4.pl583
-rw-r--r--app/openssl/crypto/sha/asm/sha512-armv8.S1021
-rw-r--r--app/openssl/crypto/sha/asm/sha512-armv8.pl414
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha512-ia64.pl672
-rw-r--r--app/openssl/crypto/sha/asm/sha512-mips.pl455
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha512-parisc.pl793
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha512-ppc.pl460
-rw-r--r--app/openssl/crypto/sha/asm/sha512-s390x.pl322
-rw-r--r--app/openssl/crypto/sha/asm/sha512-sparcv9.pl594
-rw-r--r--app/openssl/crypto/sha/asm/sha512-x86_64.S1802
-rwxr-xr-xapp/openssl/crypto/sha/asm/sha512-x86_64.pl451
40 files changed, 0 insertions, 35509 deletions
diff --git a/app/openssl/crypto/sha/asm/README b/app/openssl/crypto/sha/asm/README
deleted file mode 100644
index b7e75576..00000000
--- a/app/openssl/crypto/sha/asm/README
+++ /dev/null
@@ -1 +0,0 @@
-C2.pl works
diff --git a/app/openssl/crypto/sha/asm/sha1-586.S b/app/openssl/crypto/sha/asm/sha1-586.S
deleted file mode 100644
index 47bef2a9..00000000
--- a/app/openssl/crypto/sha/asm/sha1-586.S
+++ /dev/null
@@ -1,2639 +0,0 @@
-.file "sha1-586.s"
-.text
-.globl sha1_block_data_order
-.type sha1_block_data_order,@function
-.align 16
-sha1_block_data_order:
-.L_sha1_block_data_order_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- call .L000pic_point
-.L000pic_point:
- popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L000pic_point](%ebp),%esi
- movl OPENSSL_ia32cap_P@GOT(%esi),%esi
- leal .LK_XX_XX-.L000pic_point(%ebp),%ebp
- movl (%esi),%eax
- movl 4(%esi),%edx
- testl $512,%edx
- jz .L001x86
- testl $16777216,%eax
- jz .L001x86
- jmp .Lssse3_shortcut
-.align 16
-.L001x86:
- movl 20(%esp),%ebp
- movl 24(%esp),%esi
- movl 28(%esp),%eax
- subl $76,%esp
- shll $6,%eax
- addl %esi,%eax
- movl %eax,104(%esp)
- movl 16(%ebp),%edi
- jmp .L002loop
-.align 16
-.L002loop:
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- movl %ecx,8(%esp)
- movl %edx,12(%esp)
- movl 16(%esi),%eax
- movl 20(%esi),%ebx
- movl 24(%esi),%ecx
- movl 28(%esi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax,16(%esp)
- movl %ebx,20(%esp)
- movl %ecx,24(%esp)
- movl %edx,28(%esp)
- movl 32(%esi),%eax
- movl 36(%esi),%ebx
- movl 40(%esi),%ecx
- movl 44(%esi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax,32(%esp)
- movl %ebx,36(%esp)
- movl %ecx,40(%esp)
- movl %edx,44(%esp)
- movl 48(%esi),%eax
- movl 52(%esi),%ebx
- movl 56(%esi),%ecx
- movl 60(%esi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax,48(%esp)
- movl %ebx,52(%esp)
- movl %ecx,56(%esp)
- movl %edx,60(%esp)
- movl %esi,100(%esp)
- movl (%ebp),%eax
- movl 4(%ebp),%ebx
- movl 8(%ebp),%ecx
- movl 12(%ebp),%edx
-
- movl %ecx,%esi
- movl %eax,%ebp
- roll $5,%ebp
- xorl %edx,%esi
- addl %edi,%ebp
- movl (%esp),%edi
- andl %ebx,%esi
- rorl $2,%ebx
- xorl %edx,%esi
- leal 1518500249(%ebp,%edi,1),%ebp
- addl %esi,%ebp
-
- movl %ebx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ecx,%edi
- addl %edx,%ebp
- movl 4(%esp),%edx
- andl %eax,%edi
- rorl $2,%eax
- xorl %ecx,%edi
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %edi,%ebp
-
- movl %eax,%edx
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%edx
- addl %ecx,%ebp
- movl 8(%esp),%ecx
- andl %esi,%edx
- rorl $2,%esi
- xorl %ebx,%edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- addl %edx,%ebp
-
- movl %esi,%ecx
- movl %ebp,%edx
- roll $5,%ebp
- xorl %eax,%ecx
- addl %ebx,%ebp
- movl 12(%esp),%ebx
- andl %edi,%ecx
- rorl $2,%edi
- xorl %eax,%ecx
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ecx,%ebp
-
- movl %edi,%ebx
- movl %ebp,%ecx
- roll $5,%ebp
- xorl %esi,%ebx
- addl %eax,%ebp
- movl 16(%esp),%eax
- andl %edx,%ebx
- rorl $2,%edx
- xorl %esi,%ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- addl %ebx,%ebp
-
- movl %edx,%eax
- movl %ebp,%ebx
- roll $5,%ebp
- xorl %edi,%eax
- addl %esi,%ebp
- movl 20(%esp),%esi
- andl %ecx,%eax
- rorl $2,%ecx
- xorl %edi,%eax
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %eax,%ebp
-
- movl %ecx,%esi
- movl %ebp,%eax
- roll $5,%ebp
- xorl %edx,%esi
- addl %edi,%ebp
- movl 24(%esp),%edi
- andl %ebx,%esi
- rorl $2,%ebx
- xorl %edx,%esi
- leal 1518500249(%ebp,%edi,1),%ebp
- addl %esi,%ebp
-
- movl %ebx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ecx,%edi
- addl %edx,%ebp
- movl 28(%esp),%edx
- andl %eax,%edi
- rorl $2,%eax
- xorl %ecx,%edi
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %edi,%ebp
-
- movl %eax,%edx
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%edx
- addl %ecx,%ebp
- movl 32(%esp),%ecx
- andl %esi,%edx
- rorl $2,%esi
- xorl %ebx,%edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- addl %edx,%ebp
-
- movl %esi,%ecx
- movl %ebp,%edx
- roll $5,%ebp
- xorl %eax,%ecx
- addl %ebx,%ebp
- movl 36(%esp),%ebx
- andl %edi,%ecx
- rorl $2,%edi
- xorl %eax,%ecx
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ecx,%ebp
-
- movl %edi,%ebx
- movl %ebp,%ecx
- roll $5,%ebp
- xorl %esi,%ebx
- addl %eax,%ebp
- movl 40(%esp),%eax
- andl %edx,%ebx
- rorl $2,%edx
- xorl %esi,%ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- addl %ebx,%ebp
-
- movl %edx,%eax
- movl %ebp,%ebx
- roll $5,%ebp
- xorl %edi,%eax
- addl %esi,%ebp
- movl 44(%esp),%esi
- andl %ecx,%eax
- rorl $2,%ecx
- xorl %edi,%eax
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %eax,%ebp
-
- movl %ecx,%esi
- movl %ebp,%eax
- roll $5,%ebp
- xorl %edx,%esi
- addl %edi,%ebp
- movl 48(%esp),%edi
- andl %ebx,%esi
- rorl $2,%ebx
- xorl %edx,%esi
- leal 1518500249(%ebp,%edi,1),%ebp
- addl %esi,%ebp
-
- movl %ebx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ecx,%edi
- addl %edx,%ebp
- movl 52(%esp),%edx
- andl %eax,%edi
- rorl $2,%eax
- xorl %ecx,%edi
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %edi,%ebp
-
- movl %eax,%edx
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%edx
- addl %ecx,%ebp
- movl 56(%esp),%ecx
- andl %esi,%edx
- rorl $2,%esi
- xorl %ebx,%edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- addl %edx,%ebp
-
- movl %esi,%ecx
- movl %ebp,%edx
- roll $5,%ebp
- xorl %eax,%ecx
- addl %ebx,%ebp
- movl 60(%esp),%ebx
- andl %edi,%ecx
- rorl $2,%edi
- xorl %eax,%ecx
- leal 1518500249(%ebp,%ebx,1),%ebp
- movl (%esp),%ebx
- addl %ebp,%ecx
-
- movl %edi,%ebp
- xorl 8(%esp),%ebx
- xorl %esi,%ebp
- xorl 32(%esp),%ebx
- andl %edx,%ebp
- xorl 52(%esp),%ebx
- roll $1,%ebx
- xorl %esi,%ebp
- addl %ebp,%eax
- movl %ecx,%ebp
- rorl $2,%edx
- movl %ebx,(%esp)
- roll $5,%ebp
- leal 1518500249(%ebx,%eax,1),%ebx
- movl 4(%esp),%eax
- addl %ebp,%ebx
-
- movl %edx,%ebp
- xorl 12(%esp),%eax
- xorl %edi,%ebp
- xorl 36(%esp),%eax
- andl %ecx,%ebp
- xorl 56(%esp),%eax
- roll $1,%eax
- xorl %edi,%ebp
- addl %ebp,%esi
- movl %ebx,%ebp
- rorl $2,%ecx
- movl %eax,4(%esp)
- roll $5,%ebp
- leal 1518500249(%eax,%esi,1),%eax
- movl 8(%esp),%esi
- addl %ebp,%eax
-
- movl %ecx,%ebp
- xorl 16(%esp),%esi
- xorl %edx,%ebp
- xorl 40(%esp),%esi
- andl %ebx,%ebp
- xorl 60(%esp),%esi
- roll $1,%esi
- xorl %edx,%ebp
- addl %ebp,%edi
- movl %eax,%ebp
- rorl $2,%ebx
- movl %esi,8(%esp)
- roll $5,%ebp
- leal 1518500249(%esi,%edi,1),%esi
- movl 12(%esp),%edi
- addl %ebp,%esi
-
- movl %ebx,%ebp
- xorl 20(%esp),%edi
- xorl %ecx,%ebp
- xorl 44(%esp),%edi
- andl %eax,%ebp
- xorl (%esp),%edi
- roll $1,%edi
- xorl %ecx,%ebp
- addl %ebp,%edx
- movl %esi,%ebp
- rorl $2,%eax
- movl %edi,12(%esp)
- roll $5,%ebp
- leal 1518500249(%edi,%edx,1),%edi
- movl 16(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 24(%esp),%edx
- xorl %eax,%ebp
- xorl 48(%esp),%edx
- xorl %ebx,%ebp
- xorl 4(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,16(%esp)
- leal 1859775393(%edx,%ecx,1),%edx
- movl 20(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 28(%esp),%ecx
- xorl %esi,%ebp
- xorl 52(%esp),%ecx
- xorl %eax,%ebp
- xorl 8(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,20(%esp)
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl 24(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 32(%esp),%ebx
- xorl %edi,%ebp
- xorl 56(%esp),%ebx
- xorl %esi,%ebp
- xorl 12(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,24(%esp)
- leal 1859775393(%ebx,%eax,1),%ebx
- movl 28(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 36(%esp),%eax
- xorl %edx,%ebp
- xorl 60(%esp),%eax
- xorl %edi,%ebp
- xorl 16(%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- movl %eax,28(%esp)
- leal 1859775393(%eax,%esi,1),%eax
- movl 32(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl 40(%esp),%esi
- xorl %ecx,%ebp
- xorl (%esp),%esi
- xorl %edx,%ebp
- xorl 20(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,32(%esp)
- leal 1859775393(%esi,%edi,1),%esi
- movl 36(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 44(%esp),%edi
- xorl %ebx,%ebp
- xorl 4(%esp),%edi
- xorl %ecx,%ebp
- xorl 24(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,36(%esp)
- leal 1859775393(%edi,%edx,1),%edi
- movl 40(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 48(%esp),%edx
- xorl %eax,%ebp
- xorl 8(%esp),%edx
- xorl %ebx,%ebp
- xorl 28(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,40(%esp)
- leal 1859775393(%edx,%ecx,1),%edx
- movl 44(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 52(%esp),%ecx
- xorl %esi,%ebp
- xorl 12(%esp),%ecx
- xorl %eax,%ebp
- xorl 32(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,44(%esp)
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl 48(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 56(%esp),%ebx
- xorl %edi,%ebp
- xorl 16(%esp),%ebx
- xorl %esi,%ebp
- xorl 36(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,48(%esp)
- leal 1859775393(%ebx,%eax,1),%ebx
- movl 52(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 60(%esp),%eax
- xorl %edx,%ebp
- xorl 20(%esp),%eax
- xorl %edi,%ebp
- xorl 40(%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- movl %eax,52(%esp)
- leal 1859775393(%eax,%esi,1),%eax
- movl 56(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl (%esp),%esi
- xorl %ecx,%ebp
- xorl 24(%esp),%esi
- xorl %edx,%ebp
- xorl 44(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,56(%esp)
- leal 1859775393(%esi,%edi,1),%esi
- movl 60(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 4(%esp),%edi
- xorl %ebx,%ebp
- xorl 28(%esp),%edi
- xorl %ecx,%ebp
- xorl 48(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,60(%esp)
- leal 1859775393(%edi,%edx,1),%edi
- movl (%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 8(%esp),%edx
- xorl %eax,%ebp
- xorl 32(%esp),%edx
- xorl %ebx,%ebp
- xorl 52(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,(%esp)
- leal 1859775393(%edx,%ecx,1),%edx
- movl 4(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 12(%esp),%ecx
- xorl %esi,%ebp
- xorl 36(%esp),%ecx
- xorl %eax,%ebp
- xorl 56(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,4(%esp)
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl 8(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 16(%esp),%ebx
- xorl %edi,%ebp
- xorl 40(%esp),%ebx
- xorl %esi,%ebp
- xorl 60(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,8(%esp)
- leal 1859775393(%ebx,%eax,1),%ebx
- movl 12(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 20(%esp),%eax
- xorl %edx,%ebp
- xorl 44(%esp),%eax
- xorl %edi,%ebp
- xorl (%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- movl %eax,12(%esp)
- leal 1859775393(%eax,%esi,1),%eax
- movl 16(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl 24(%esp),%esi
- xorl %ecx,%ebp
- xorl 48(%esp),%esi
- xorl %edx,%ebp
- xorl 4(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,16(%esp)
- leal 1859775393(%esi,%edi,1),%esi
- movl 20(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 28(%esp),%edi
- xorl %ebx,%ebp
- xorl 52(%esp),%edi
- xorl %ecx,%ebp
- xorl 8(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,20(%esp)
- leal 1859775393(%edi,%edx,1),%edi
- movl 24(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 32(%esp),%edx
- xorl %eax,%ebp
- xorl 56(%esp),%edx
- xorl %ebx,%ebp
- xorl 12(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,24(%esp)
- leal 1859775393(%edx,%ecx,1),%edx
- movl 28(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 36(%esp),%ecx
- xorl %esi,%ebp
- xorl 60(%esp),%ecx
- xorl %eax,%ebp
- xorl 16(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,28(%esp)
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl 32(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edi,%ebp
- xorl 40(%esp),%ebx
- xorl %esi,%ebp
- xorl (%esp),%ebx
- andl %edx,%ebp
- xorl 20(%esp),%ebx
- roll $1,%ebx
- addl %eax,%ebp
- rorl $2,%edx
- movl %ecx,%eax
- roll $5,%eax
- movl %ebx,32(%esp)
- leal 2400959708(%ebx,%ebp,1),%ebx
- movl %edi,%ebp
- addl %eax,%ebx
- andl %esi,%ebp
- movl 36(%esp),%eax
- addl %ebp,%ebx
-
- movl %edx,%ebp
- xorl 44(%esp),%eax
- xorl %edi,%ebp
- xorl 4(%esp),%eax
- andl %ecx,%ebp
- xorl 24(%esp),%eax
- roll $1,%eax
- addl %esi,%ebp
- rorl $2,%ecx
- movl %ebx,%esi
- roll $5,%esi
- movl %eax,36(%esp)
- leal 2400959708(%eax,%ebp,1),%eax
- movl %edx,%ebp
- addl %esi,%eax
- andl %edi,%ebp
- movl 40(%esp),%esi
- addl %ebp,%eax
-
- movl %ecx,%ebp
- xorl 48(%esp),%esi
- xorl %edx,%ebp
- xorl 8(%esp),%esi
- andl %ebx,%ebp
- xorl 28(%esp),%esi
- roll $1,%esi
- addl %edi,%ebp
- rorl $2,%ebx
- movl %eax,%edi
- roll $5,%edi
- movl %esi,40(%esp)
- leal 2400959708(%esi,%ebp,1),%esi
- movl %ecx,%ebp
- addl %edi,%esi
- andl %edx,%ebp
- movl 44(%esp),%edi
- addl %ebp,%esi
-
- movl %ebx,%ebp
- xorl 52(%esp),%edi
- xorl %ecx,%ebp
- xorl 12(%esp),%edi
- andl %eax,%ebp
- xorl 32(%esp),%edi
- roll $1,%edi
- addl %edx,%ebp
- rorl $2,%eax
- movl %esi,%edx
- roll $5,%edx
- movl %edi,44(%esp)
- leal 2400959708(%edi,%ebp,1),%edi
- movl %ebx,%ebp
- addl %edx,%edi
- andl %ecx,%ebp
- movl 48(%esp),%edx
- addl %ebp,%edi
-
- movl %eax,%ebp
- xorl 56(%esp),%edx
- xorl %ebx,%ebp
- xorl 16(%esp),%edx
- andl %esi,%ebp
- xorl 36(%esp),%edx
- roll $1,%edx
- addl %ecx,%ebp
- rorl $2,%esi
- movl %edi,%ecx
- roll $5,%ecx
- movl %edx,48(%esp)
- leal 2400959708(%edx,%ebp,1),%edx
- movl %eax,%ebp
- addl %ecx,%edx
- andl %ebx,%ebp
- movl 52(%esp),%ecx
- addl %ebp,%edx
-
- movl %esi,%ebp
- xorl 60(%esp),%ecx
- xorl %eax,%ebp
- xorl 20(%esp),%ecx
- andl %edi,%ebp
- xorl 40(%esp),%ecx
- roll $1,%ecx
- addl %ebx,%ebp
- rorl $2,%edi
- movl %edx,%ebx
- roll $5,%ebx
- movl %ecx,52(%esp)
- leal 2400959708(%ecx,%ebp,1),%ecx
- movl %esi,%ebp
- addl %ebx,%ecx
- andl %eax,%ebp
- movl 56(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edi,%ebp
- xorl (%esp),%ebx
- xorl %esi,%ebp
- xorl 24(%esp),%ebx
- andl %edx,%ebp
- xorl 44(%esp),%ebx
- roll $1,%ebx
- addl %eax,%ebp
- rorl $2,%edx
- movl %ecx,%eax
- roll $5,%eax
- movl %ebx,56(%esp)
- leal 2400959708(%ebx,%ebp,1),%ebx
- movl %edi,%ebp
- addl %eax,%ebx
- andl %esi,%ebp
- movl 60(%esp),%eax
- addl %ebp,%ebx
-
- movl %edx,%ebp
- xorl 4(%esp),%eax
- xorl %edi,%ebp
- xorl 28(%esp),%eax
- andl %ecx,%ebp
- xorl 48(%esp),%eax
- roll $1,%eax
- addl %esi,%ebp
- rorl $2,%ecx
- movl %ebx,%esi
- roll $5,%esi
- movl %eax,60(%esp)
- leal 2400959708(%eax,%ebp,1),%eax
- movl %edx,%ebp
- addl %esi,%eax
- andl %edi,%ebp
- movl (%esp),%esi
- addl %ebp,%eax
-
- movl %ecx,%ebp
- xorl 8(%esp),%esi
- xorl %edx,%ebp
- xorl 32(%esp),%esi
- andl %ebx,%ebp
- xorl 52(%esp),%esi
- roll $1,%esi
- addl %edi,%ebp
- rorl $2,%ebx
- movl %eax,%edi
- roll $5,%edi
- movl %esi,(%esp)
- leal 2400959708(%esi,%ebp,1),%esi
- movl %ecx,%ebp
- addl %edi,%esi
- andl %edx,%ebp
- movl 4(%esp),%edi
- addl %ebp,%esi
-
- movl %ebx,%ebp
- xorl 12(%esp),%edi
- xorl %ecx,%ebp
- xorl 36(%esp),%edi
- andl %eax,%ebp
- xorl 56(%esp),%edi
- roll $1,%edi
- addl %edx,%ebp
- rorl $2,%eax
- movl %esi,%edx
- roll $5,%edx
- movl %edi,4(%esp)
- leal 2400959708(%edi,%ebp,1),%edi
- movl %ebx,%ebp
- addl %edx,%edi
- andl %ecx,%ebp
- movl 8(%esp),%edx
- addl %ebp,%edi
-
- movl %eax,%ebp
- xorl 16(%esp),%edx
- xorl %ebx,%ebp
- xorl 40(%esp),%edx
- andl %esi,%ebp
- xorl 60(%esp),%edx
- roll $1,%edx
- addl %ecx,%ebp
- rorl $2,%esi
- movl %edi,%ecx
- roll $5,%ecx
- movl %edx,8(%esp)
- leal 2400959708(%edx,%ebp,1),%edx
- movl %eax,%ebp
- addl %ecx,%edx
- andl %ebx,%ebp
- movl 12(%esp),%ecx
- addl %ebp,%edx
-
- movl %esi,%ebp
- xorl 20(%esp),%ecx
- xorl %eax,%ebp
- xorl 44(%esp),%ecx
- andl %edi,%ebp
- xorl (%esp),%ecx
- roll $1,%ecx
- addl %ebx,%ebp
- rorl $2,%edi
- movl %edx,%ebx
- roll $5,%ebx
- movl %ecx,12(%esp)
- leal 2400959708(%ecx,%ebp,1),%ecx
- movl %esi,%ebp
- addl %ebx,%ecx
- andl %eax,%ebp
- movl 16(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edi,%ebp
- xorl 24(%esp),%ebx
- xorl %esi,%ebp
- xorl 48(%esp),%ebx
- andl %edx,%ebp
- xorl 4(%esp),%ebx
- roll $1,%ebx
- addl %eax,%ebp
- rorl $2,%edx
- movl %ecx,%eax
- roll $5,%eax
- movl %ebx,16(%esp)
- leal 2400959708(%ebx,%ebp,1),%ebx
- movl %edi,%ebp
- addl %eax,%ebx
- andl %esi,%ebp
- movl 20(%esp),%eax
- addl %ebp,%ebx
-
- movl %edx,%ebp
- xorl 28(%esp),%eax
- xorl %edi,%ebp
- xorl 52(%esp),%eax
- andl %ecx,%ebp
- xorl 8(%esp),%eax
- roll $1,%eax
- addl %esi,%ebp
- rorl $2,%ecx
- movl %ebx,%esi
- roll $5,%esi
- movl %eax,20(%esp)
- leal 2400959708(%eax,%ebp,1),%eax
- movl %edx,%ebp
- addl %esi,%eax
- andl %edi,%ebp
- movl 24(%esp),%esi
- addl %ebp,%eax
-
- movl %ecx,%ebp
- xorl 32(%esp),%esi
- xorl %edx,%ebp
- xorl 56(%esp),%esi
- andl %ebx,%ebp
- xorl 12(%esp),%esi
- roll $1,%esi
- addl %edi,%ebp
- rorl $2,%ebx
- movl %eax,%edi
- roll $5,%edi
- movl %esi,24(%esp)
- leal 2400959708(%esi,%ebp,1),%esi
- movl %ecx,%ebp
- addl %edi,%esi
- andl %edx,%ebp
- movl 28(%esp),%edi
- addl %ebp,%esi
-
- movl %ebx,%ebp
- xorl 36(%esp),%edi
- xorl %ecx,%ebp
- xorl 60(%esp),%edi
- andl %eax,%ebp
- xorl 16(%esp),%edi
- roll $1,%edi
- addl %edx,%ebp
- rorl $2,%eax
- movl %esi,%edx
- roll $5,%edx
- movl %edi,28(%esp)
- leal 2400959708(%edi,%ebp,1),%edi
- movl %ebx,%ebp
- addl %edx,%edi
- andl %ecx,%ebp
- movl 32(%esp),%edx
- addl %ebp,%edi
-
- movl %eax,%ebp
- xorl 40(%esp),%edx
- xorl %ebx,%ebp
- xorl (%esp),%edx
- andl %esi,%ebp
- xorl 20(%esp),%edx
- roll $1,%edx
- addl %ecx,%ebp
- rorl $2,%esi
- movl %edi,%ecx
- roll $5,%ecx
- movl %edx,32(%esp)
- leal 2400959708(%edx,%ebp,1),%edx
- movl %eax,%ebp
- addl %ecx,%edx
- andl %ebx,%ebp
- movl 36(%esp),%ecx
- addl %ebp,%edx
-
- movl %esi,%ebp
- xorl 44(%esp),%ecx
- xorl %eax,%ebp
- xorl 4(%esp),%ecx
- andl %edi,%ebp
- xorl 24(%esp),%ecx
- roll $1,%ecx
- addl %ebx,%ebp
- rorl $2,%edi
- movl %edx,%ebx
- roll $5,%ebx
- movl %ecx,36(%esp)
- leal 2400959708(%ecx,%ebp,1),%ecx
- movl %esi,%ebp
- addl %ebx,%ecx
- andl %eax,%ebp
- movl 40(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edi,%ebp
- xorl 48(%esp),%ebx
- xorl %esi,%ebp
- xorl 8(%esp),%ebx
- andl %edx,%ebp
- xorl 28(%esp),%ebx
- roll $1,%ebx
- addl %eax,%ebp
- rorl $2,%edx
- movl %ecx,%eax
- roll $5,%eax
- movl %ebx,40(%esp)
- leal 2400959708(%ebx,%ebp,1),%ebx
- movl %edi,%ebp
- addl %eax,%ebx
- andl %esi,%ebp
- movl 44(%esp),%eax
- addl %ebp,%ebx
-
- movl %edx,%ebp
- xorl 52(%esp),%eax
- xorl %edi,%ebp
- xorl 12(%esp),%eax
- andl %ecx,%ebp
- xorl 32(%esp),%eax
- roll $1,%eax
- addl %esi,%ebp
- rorl $2,%ecx
- movl %ebx,%esi
- roll $5,%esi
- movl %eax,44(%esp)
- leal 2400959708(%eax,%ebp,1),%eax
- movl %edx,%ebp
- addl %esi,%eax
- andl %edi,%ebp
- movl 48(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl 56(%esp),%esi
- xorl %ecx,%ebp
- xorl 16(%esp),%esi
- xorl %edx,%ebp
- xorl 36(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,48(%esp)
- leal 3395469782(%esi,%edi,1),%esi
- movl 52(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 60(%esp),%edi
- xorl %ebx,%ebp
- xorl 20(%esp),%edi
- xorl %ecx,%ebp
- xorl 40(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,52(%esp)
- leal 3395469782(%edi,%edx,1),%edi
- movl 56(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl (%esp),%edx
- xorl %eax,%ebp
- xorl 24(%esp),%edx
- xorl %ebx,%ebp
- xorl 44(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,56(%esp)
- leal 3395469782(%edx,%ecx,1),%edx
- movl 60(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 4(%esp),%ecx
- xorl %esi,%ebp
- xorl 28(%esp),%ecx
- xorl %eax,%ebp
- xorl 48(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,60(%esp)
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl (%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 8(%esp),%ebx
- xorl %edi,%ebp
- xorl 32(%esp),%ebx
- xorl %esi,%ebp
- xorl 52(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,(%esp)
- leal 3395469782(%ebx,%eax,1),%ebx
- movl 4(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 12(%esp),%eax
- xorl %edx,%ebp
- xorl 36(%esp),%eax
- xorl %edi,%ebp
- xorl 56(%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- movl %eax,4(%esp)
- leal 3395469782(%eax,%esi,1),%eax
- movl 8(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl 16(%esp),%esi
- xorl %ecx,%ebp
- xorl 40(%esp),%esi
- xorl %edx,%ebp
- xorl 60(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,8(%esp)
- leal 3395469782(%esi,%edi,1),%esi
- movl 12(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 20(%esp),%edi
- xorl %ebx,%ebp
- xorl 44(%esp),%edi
- xorl %ecx,%ebp
- xorl (%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,12(%esp)
- leal 3395469782(%edi,%edx,1),%edi
- movl 16(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 24(%esp),%edx
- xorl %eax,%ebp
- xorl 48(%esp),%edx
- xorl %ebx,%ebp
- xorl 4(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,16(%esp)
- leal 3395469782(%edx,%ecx,1),%edx
- movl 20(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 28(%esp),%ecx
- xorl %esi,%ebp
- xorl 52(%esp),%ecx
- xorl %eax,%ebp
- xorl 8(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,20(%esp)
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl 24(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 32(%esp),%ebx
- xorl %edi,%ebp
- xorl 56(%esp),%ebx
- xorl %esi,%ebp
- xorl 12(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,24(%esp)
- leal 3395469782(%ebx,%eax,1),%ebx
- movl 28(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 36(%esp),%eax
- xorl %edx,%ebp
- xorl 60(%esp),%eax
- xorl %edi,%ebp
- xorl 16(%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- movl %eax,28(%esp)
- leal 3395469782(%eax,%esi,1),%eax
- movl 32(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl 40(%esp),%esi
- xorl %ecx,%ebp
- xorl (%esp),%esi
- xorl %edx,%ebp
- xorl 20(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- movl %esi,32(%esp)
- leal 3395469782(%esi,%edi,1),%esi
- movl 36(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 44(%esp),%edi
- xorl %ebx,%ebp
- xorl 4(%esp),%edi
- xorl %ecx,%ebp
- xorl 24(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- movl %edi,36(%esp)
- leal 3395469782(%edi,%edx,1),%edi
- movl 40(%esp),%edx
- addl %ebp,%edi
-
- movl %esi,%ebp
- xorl 48(%esp),%edx
- xorl %eax,%ebp
- xorl 8(%esp),%edx
- xorl %ebx,%ebp
- xorl 28(%esp),%edx
- roll $1,%edx
- addl %ebp,%ecx
- rorl $2,%esi
- movl %edi,%ebp
- roll $5,%ebp
- movl %edx,40(%esp)
- leal 3395469782(%edx,%ecx,1),%edx
- movl 44(%esp),%ecx
- addl %ebp,%edx
-
- movl %edi,%ebp
- xorl 52(%esp),%ecx
- xorl %esi,%ebp
- xorl 12(%esp),%ecx
- xorl %eax,%ebp
- xorl 32(%esp),%ecx
- roll $1,%ecx
- addl %ebp,%ebx
- rorl $2,%edi
- movl %edx,%ebp
- roll $5,%ebp
- movl %ecx,44(%esp)
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl 48(%esp),%ebx
- addl %ebp,%ecx
-
- movl %edx,%ebp
- xorl 56(%esp),%ebx
- xorl %edi,%ebp
- xorl 16(%esp),%ebx
- xorl %esi,%ebp
- xorl 36(%esp),%ebx
- roll $1,%ebx
- addl %ebp,%eax
- rorl $2,%edx
- movl %ecx,%ebp
- roll $5,%ebp
- movl %ebx,48(%esp)
- leal 3395469782(%ebx,%eax,1),%ebx
- movl 52(%esp),%eax
- addl %ebp,%ebx
-
- movl %ecx,%ebp
- xorl 60(%esp),%eax
- xorl %edx,%ebp
- xorl 20(%esp),%eax
- xorl %edi,%ebp
- xorl 40(%esp),%eax
- roll $1,%eax
- addl %ebp,%esi
- rorl $2,%ecx
- movl %ebx,%ebp
- roll $5,%ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl 56(%esp),%esi
- addl %ebp,%eax
-
- movl %ebx,%ebp
- xorl (%esp),%esi
- xorl %ecx,%ebp
- xorl 24(%esp),%esi
- xorl %edx,%ebp
- xorl 44(%esp),%esi
- roll $1,%esi
- addl %ebp,%edi
- rorl $2,%ebx
- movl %eax,%ebp
- roll $5,%ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl 60(%esp),%edi
- addl %ebp,%esi
-
- movl %eax,%ebp
- xorl 4(%esp),%edi
- xorl %ebx,%ebp
- xorl 28(%esp),%edi
- xorl %ecx,%ebp
- xorl 48(%esp),%edi
- roll $1,%edi
- addl %ebp,%edx
- rorl $2,%eax
- movl %esi,%ebp
- roll $5,%ebp
- leal 3395469782(%edi,%edx,1),%edi
- addl %ebp,%edi
- movl 96(%esp),%ebp
- movl 100(%esp),%edx
- addl (%ebp),%edi
- addl 4(%ebp),%esi
- addl 8(%ebp),%eax
- addl 12(%ebp),%ebx
- addl 16(%ebp),%ecx
- movl %edi,(%ebp)
- addl $64,%edx
- movl %esi,4(%ebp)
- cmpl 104(%esp),%edx
- movl %eax,8(%ebp)
- movl %ecx,%edi
- movl %ebx,12(%ebp)
- movl %edx,%esi
- movl %ecx,16(%ebp)
- jb .L002loop
- addl $76,%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.type _sha1_block_data_order_ssse3,@function
-.align 16
-_sha1_block_data_order_ssse3:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- call .L003pic_point
-.L003pic_point:
- popl %ebp
- leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
-.Lssse3_shortcut:
- movdqa (%ebp),%xmm7
- movdqa 16(%ebp),%xmm0
- movdqa 32(%ebp),%xmm1
- movdqa 48(%ebp),%xmm2
- movdqa 64(%ebp),%xmm6
- movl 20(%esp),%edi
- movl 24(%esp),%ebp
- movl 28(%esp),%edx
- movl %esp,%esi
- subl $208,%esp
- andl $-64,%esp
- movdqa %xmm0,112(%esp)
- movdqa %xmm1,128(%esp)
- movdqa %xmm2,144(%esp)
- shll $6,%edx
- movdqa %xmm7,160(%esp)
- addl %ebp,%edx
- movdqa %xmm6,176(%esp)
- addl $64,%ebp
- movl %edi,192(%esp)
- movl %ebp,196(%esp)
- movl %edx,200(%esp)
- movl %esi,204(%esp)
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- movl 12(%edi),%edx
- movl 16(%edi),%edi
- movl %ebx,%esi
- movdqu -64(%ebp),%xmm0
- movdqu -48(%ebp),%xmm1
- movdqu -32(%ebp),%xmm2
- movdqu -16(%ebp),%xmm3
-.byte 102,15,56,0,198
-.byte 102,15,56,0,206
-.byte 102,15,56,0,214
- movdqa %xmm7,96(%esp)
-.byte 102,15,56,0,222
- paddd %xmm7,%xmm0
- paddd %xmm7,%xmm1
- paddd %xmm7,%xmm2
- movdqa %xmm0,(%esp)
- psubd %xmm7,%xmm0
- movdqa %xmm1,16(%esp)
- psubd %xmm7,%xmm1
- movdqa %xmm2,32(%esp)
- psubd %xmm7,%xmm2
- movdqa %xmm1,%xmm4
- jmp .L004loop
-.align 16
-.L004loop:
- addl (%esp),%edi
- xorl %edx,%ecx
-.byte 102,15,58,15,224,8
- movdqa %xmm3,%xmm6
- movl %eax,%ebp
- roll $5,%eax
- paddd %xmm3,%xmm7
- movdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
- psrldq $4,%xmm6
- xorl %edx,%esi
- addl %eax,%edi
- pxor %xmm0,%xmm4
- rorl $2,%ebx
- addl %esi,%edi
- pxor %xmm2,%xmm6
- addl 4(%esp),%edx
- xorl %ecx,%ebx
- movl %edi,%esi
- roll $5,%edi
- pxor %xmm6,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
- movdqa %xmm7,48(%esp)
- xorl %ecx,%ebp
- addl %edi,%edx
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm6
- rorl $7,%eax
- addl %ebp,%edx
- addl 8(%esp),%ecx
- xorl %ebx,%eax
- pslldq $12,%xmm0
- paddd %xmm4,%xmm4
- movl %edx,%ebp
- roll $5,%edx
- andl %eax,%esi
- xorl %ebx,%eax
- psrld $31,%xmm6
- xorl %ebx,%esi
- addl %edx,%ecx
- movdqa %xmm0,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
- psrld $30,%xmm0
- por %xmm6,%xmm4
- addl 12(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
- roll $5,%ecx
- pslld $2,%xmm7
- pxor %xmm0,%xmm4
- andl %edi,%ebp
- xorl %eax,%edi
- movdqa 96(%esp),%xmm0
- xorl %eax,%ebp
- addl %ecx,%ebx
- pxor %xmm7,%xmm4
- movdqa %xmm2,%xmm5
- rorl $7,%edx
- addl %ebp,%ebx
- addl 16(%esp),%eax
- xorl %edi,%edx
-.byte 102,15,58,15,233,8
- movdqa %xmm4,%xmm7
- movl %ebx,%ebp
- roll $5,%ebx
- paddd %xmm4,%xmm0
- movdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
- psrldq $4,%xmm7
- xorl %edi,%esi
- addl %ebx,%eax
- pxor %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
- pxor %xmm3,%xmm7
- addl 20(%esp),%edi
- xorl %edx,%ecx
- movl %eax,%esi
- roll $5,%eax
- pxor %xmm7,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
- movdqa %xmm0,(%esp)
- xorl %edx,%ebp
- addl %eax,%edi
- movdqa %xmm5,%xmm1
- movdqa %xmm5,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- addl 24(%esp),%edx
- xorl %ecx,%ebx
- pslldq $12,%xmm1
- paddd %xmm5,%xmm5
- movl %edi,%ebp
- roll $5,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
- psrld $31,%xmm7
- xorl %ecx,%esi
- addl %edi,%edx
- movdqa %xmm1,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- psrld $30,%xmm1
- por %xmm7,%xmm5
- addl 28(%esp),%ecx
- xorl %ebx,%eax
- movl %edx,%esi
- roll $5,%edx
- pslld $2,%xmm0
- pxor %xmm1,%xmm5
- andl %eax,%ebp
- xorl %ebx,%eax
- movdqa 112(%esp),%xmm1
- xorl %ebx,%ebp
- addl %edx,%ecx
- pxor %xmm0,%xmm5
- movdqa %xmm3,%xmm6
- rorl $7,%edi
- addl %ebp,%ecx
- addl 32(%esp),%ebx
- xorl %eax,%edi
-.byte 102,15,58,15,242,8
- movdqa %xmm5,%xmm0
- movl %ecx,%ebp
- roll $5,%ecx
- paddd %xmm5,%xmm1
- movdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
- psrldq $4,%xmm0
- xorl %eax,%esi
- addl %ecx,%ebx
- pxor %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
- pxor %xmm4,%xmm0
- addl 36(%esp),%eax
- xorl %edi,%edx
- movl %ebx,%esi
- roll $5,%ebx
- pxor %xmm0,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
- movdqa %xmm1,16(%esp)
- xorl %edi,%ebp
- addl %ebx,%eax
- movdqa %xmm6,%xmm2
- movdqa %xmm6,%xmm0
- rorl $7,%ecx
- addl %ebp,%eax
- addl 40(%esp),%edi
- xorl %edx,%ecx
- pslldq $12,%xmm2
- paddd %xmm6,%xmm6
- movl %eax,%ebp
- roll $5,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
- psrld $31,%xmm0
- xorl %edx,%esi
- addl %eax,%edi
- movdqa %xmm2,%xmm1
- rorl $7,%ebx
- addl %esi,%edi
- psrld $30,%xmm2
- por %xmm0,%xmm6
- addl 44(%esp),%edx
- xorl %ecx,%ebx
- movdqa 64(%esp),%xmm0
- movl %edi,%esi
- roll $5,%edi
- pslld $2,%xmm1
- pxor %xmm2,%xmm6
- andl %ebx,%ebp
- xorl %ecx,%ebx
- movdqa 112(%esp),%xmm2
- xorl %ecx,%ebp
- addl %edi,%edx
- pxor %xmm1,%xmm6
- movdqa %xmm4,%xmm7
- rorl $7,%eax
- addl %ebp,%edx
- addl 48(%esp),%ecx
- xorl %ebx,%eax
-.byte 102,15,58,15,251,8
- movdqa %xmm6,%xmm1
- movl %edx,%ebp
- roll $5,%edx
- paddd %xmm6,%xmm2
- movdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
- psrldq $4,%xmm1
- xorl %ebx,%esi
- addl %edx,%ecx
- pxor %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
- pxor %xmm5,%xmm1
- addl 52(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
- roll $5,%ecx
- pxor %xmm1,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
- movdqa %xmm2,32(%esp)
- xorl %eax,%ebp
- addl %ecx,%ebx
- movdqa %xmm7,%xmm3
- movdqa %xmm7,%xmm1
- rorl $7,%edx
- addl %ebp,%ebx
- addl 56(%esp),%eax
- xorl %edi,%edx
- pslldq $12,%xmm3
- paddd %xmm7,%xmm7
- movl %ebx,%ebp
- roll $5,%ebx
- andl %edx,%esi
- xorl %edi,%edx
- psrld $31,%xmm1
- xorl %edi,%esi
- addl %ebx,%eax
- movdqa %xmm3,%xmm2
- rorl $7,%ecx
- addl %esi,%eax
- psrld $30,%xmm3
- por %xmm1,%xmm7
- addl 60(%esp),%edi
- xorl %edx,%ecx
- movdqa 80(%esp),%xmm1
- movl %eax,%esi
- roll $5,%eax
- pslld $2,%xmm2
- pxor %xmm3,%xmm7
- andl %ecx,%ebp
- xorl %edx,%ecx
- movdqa 112(%esp),%xmm3
- xorl %edx,%ebp
- addl %eax,%edi
- pxor %xmm2,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm7,%xmm2
- addl (%esp),%edx
- pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
- xorl %ecx,%ebx
- movl %edi,%ebp
- roll $5,%edi
- pxor %xmm1,%xmm0
- movdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
- movdqa %xmm3,%xmm4
- paddd %xmm7,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
- pxor %xmm2,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
- xorl %ebx,%eax
- movdqa %xmm0,%xmm2
- movdqa %xmm3,48(%esp)
- movl %edx,%esi
- roll $5,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
- pslld $2,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
- psrld $30,%xmm2
- rorl $7,%edi
- addl %ebp,%ecx
- addl 8(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%ebp
- roll $5,%ecx
- por %xmm2,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
- movdqa 96(%esp),%xmm2
- xorl %eax,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 12(%esp),%eax
- movdqa %xmm0,%xmm3
- xorl %edi,%edx
- movl %ebx,%esi
- roll $5,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- addl 16(%esp),%edi
- pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
- xorl %edx,%esi
- movl %eax,%ebp
- roll $5,%eax
- pxor %xmm2,%xmm1
- movdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
- movdqa %xmm4,%xmm5
- paddd %xmm0,%xmm4
- rorl $7,%ebx
- addl %esi,%edi
- pxor %xmm3,%xmm1
- addl 20(%esp),%edx
- xorl %ecx,%ebp
- movl %edi,%esi
- roll $5,%edi
- movdqa %xmm1,%xmm3
- movdqa %xmm4,(%esp)
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
- pslld $2,%xmm1
- addl 24(%esp),%ecx
- xorl %ebx,%esi
- psrld $30,%xmm3
- movl %edx,%ebp
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- por %xmm3,%xmm1
- addl 28(%esp),%ebx
- xorl %eax,%ebp
- movdqa 64(%esp),%xmm3
- movl %ecx,%esi
- roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- movdqa %xmm1,%xmm4
- addl %ebp,%ebx
- addl 32(%esp),%eax
- pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
- xorl %edi,%esi
- movl %ebx,%ebp
- roll $5,%ebx
- pxor %xmm3,%xmm2
- movdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
- movdqa 128(%esp),%xmm6
- paddd %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
- pxor %xmm4,%xmm2
- addl 36(%esp),%edi
- xorl %edx,%ebp
- movl %eax,%esi
- roll $5,%eax
- movdqa %xmm2,%xmm4
- movdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
- pslld $2,%xmm2
- addl 40(%esp),%edx
- xorl %ecx,%esi
- psrld $30,%xmm4
- movl %edi,%ebp
- roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- por %xmm4,%xmm2
- addl 44(%esp),%ecx
- xorl %ebx,%ebp
- movdqa 80(%esp),%xmm4
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- movdqa %xmm2,%xmm5
- addl %ebp,%ecx
- addl 48(%esp),%ebx
- pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %eax,%esi
- movl %ecx,%ebp
- roll $5,%ecx
- pxor %xmm4,%xmm3
- movdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
- movdqa %xmm6,%xmm7
- paddd %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
- pxor %xmm5,%xmm3
- addl 52(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- roll $5,%ebx
- movdqa %xmm3,%xmm5
- movdqa %xmm6,32(%esp)
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- pslld $2,%xmm3
- addl 56(%esp),%edi
- xorl %edx,%esi
- psrld $30,%xmm5
- movl %eax,%ebp
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
- por %xmm5,%xmm3
- addl 60(%esp),%edx
- xorl %ecx,%ebp
- movdqa 96(%esp),%xmm5
- movl %edi,%esi
- roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- movdqa %xmm3,%xmm6
- addl %ebp,%edx
- addl (%esp),%ecx
- pxor %xmm0,%xmm4
-.byte 102,15,58,15,242,8
- xorl %ebx,%esi
- movl %edx,%ebp
- roll $5,%edx
- pxor %xmm5,%xmm4
- movdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
- movdqa %xmm7,%xmm0
- paddd %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
- pxor %xmm6,%xmm4
- addl 4(%esp),%ebx
- xorl %eax,%ebp
- movl %ecx,%esi
- roll $5,%ecx
- movdqa %xmm4,%xmm6
- movdqa %xmm7,48(%esp)
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
- pslld $2,%xmm4
- addl 8(%esp),%eax
- xorl %edi,%esi
- psrld $30,%xmm6
- movl %ebx,%ebp
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- por %xmm6,%xmm4
- addl 12(%esp),%edi
- xorl %edx,%ebp
- movdqa 64(%esp),%xmm6
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- movdqa %xmm4,%xmm7
- addl %ebp,%edi
- addl 16(%esp),%edx
- pxor %xmm1,%xmm5
-.byte 102,15,58,15,251,8
- xorl %ecx,%esi
- movl %edi,%ebp
- roll $5,%edi
- pxor %xmm6,%xmm5
- movdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
- movdqa %xmm0,%xmm1
- paddd %xmm4,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- pxor %xmm7,%xmm5
- addl 20(%esp),%ecx
- xorl %ebx,%ebp
- movl %edx,%esi
- roll $5,%edx
- movdqa %xmm5,%xmm7
- movdqa %xmm0,(%esp)
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
- pslld $2,%xmm5
- addl 24(%esp),%ebx
- xorl %eax,%esi
- psrld $30,%xmm7
- movl %ecx,%ebp
- roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- por %xmm7,%xmm5
- addl 28(%esp),%eax
- xorl %edi,%ebp
- movdqa 80(%esp),%xmm7
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- movdqa %xmm5,%xmm0
- addl %ebp,%eax
- movl %ecx,%ebp
- pxor %xmm2,%xmm6
-.byte 102,15,58,15,196,8
- xorl %edx,%ecx
- addl 32(%esp),%edi
- andl %edx,%ebp
- pxor %xmm7,%xmm6
- movdqa %xmm2,80(%esp)
- andl %ecx,%esi
- rorl $7,%ebx
- movdqa %xmm1,%xmm2
- paddd %xmm5,%xmm1
- addl %ebp,%edi
- movl %eax,%ebp
- pxor %xmm0,%xmm6
- roll $5,%eax
- addl %esi,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- movdqa %xmm6,%xmm0
- movdqa %xmm1,16(%esp)
- movl %ebx,%esi
- xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- pslld $2,%xmm6
- andl %ebx,%ebp
- rorl $7,%eax
- psrld $30,%xmm0
- addl %esi,%edx
- movl %edi,%esi
- roll $5,%edi
- addl %ebp,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- por %xmm0,%xmm6
- movl %eax,%ebp
- xorl %ebx,%eax
- movdqa 96(%esp),%xmm0
- addl 40(%esp),%ecx
- andl %ebx,%ebp
- andl %eax,%esi
- rorl $7,%edi
- addl %ebp,%ecx
- movdqa %xmm6,%xmm1
- movl %edx,%ebp
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movl %edi,%esi
- xorl %eax,%edi
- addl 44(%esp),%ebx
- andl %eax,%esi
- andl %edi,%ebp
- rorl $7,%edx
- addl %esi,%ebx
- movl %ecx,%esi
- roll $5,%ecx
- addl %ebp,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- movl %edx,%ebp
- pxor %xmm3,%xmm7
-.byte 102,15,58,15,205,8
- xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
- pxor %xmm0,%xmm7
- movdqa %xmm3,96(%esp)
- andl %edx,%esi
- rorl $7,%ecx
- movdqa 144(%esp),%xmm3
- paddd %xmm6,%xmm2
- addl %ebp,%eax
- movl %ebx,%ebp
- pxor %xmm1,%xmm7
- roll $5,%ebx
- addl %esi,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- movdqa %xmm7,%xmm1
- movdqa %xmm2,32(%esp)
- movl %ecx,%esi
- xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- pslld $2,%xmm7
- andl %ecx,%ebp
- rorl $7,%ebx
- psrld $30,%xmm1
- addl %esi,%edi
- movl %eax,%esi
- roll $5,%eax
- addl %ebp,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- por %xmm1,%xmm7
- movl %ebx,%ebp
- xorl %ecx,%ebx
- movdqa 64(%esp),%xmm1
- addl 56(%esp),%edx
- andl %ecx,%ebp
- andl %ebx,%esi
- rorl $7,%eax
- addl %ebp,%edx
- movdqa %xmm7,%xmm2
- movl %edi,%ebp
- roll $5,%edi
- addl %esi,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- movl %eax,%esi
- xorl %ebx,%eax
- addl 60(%esp),%ecx
- andl %ebx,%esi
- andl %eax,%ebp
- rorl $7,%edi
- addl %esi,%ecx
- movl %edx,%esi
- roll $5,%edx
- addl %ebp,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movl %edi,%ebp
- pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
- xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
- pxor %xmm1,%xmm0
- movdqa %xmm4,64(%esp)
- andl %edi,%esi
- rorl $7,%edx
- movdqa %xmm3,%xmm4
- paddd %xmm7,%xmm3
- addl %ebp,%ebx
- movl %ecx,%ebp
- pxor %xmm2,%xmm0
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- movdqa %xmm0,%xmm2
- movdqa %xmm3,48(%esp)
- movl %edx,%esi
- xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- pslld $2,%xmm0
- andl %edx,%ebp
- rorl $7,%ecx
- psrld $30,%xmm2
- addl %esi,%eax
- movl %ebx,%esi
- roll $5,%ebx
- addl %ebp,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- por %xmm2,%xmm0
- movl %ecx,%ebp
- xorl %edx,%ecx
- movdqa 80(%esp),%xmm2
- addl 8(%esp),%edi
- andl %edx,%ebp
- andl %ecx,%esi
- rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm0,%xmm3
- movl %eax,%ebp
- roll $5,%eax
- addl %esi,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- movl %ebx,%esi
- xorl %ecx,%ebx
- addl 12(%esp),%edx
- andl %ecx,%esi
- andl %ebx,%ebp
- rorl $7,%eax
- addl %esi,%edx
- movl %edi,%esi
- roll $5,%edi
- addl %ebp,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- movl %eax,%ebp
- pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
- xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
- pxor %xmm2,%xmm1
- movdqa %xmm5,80(%esp)
- andl %eax,%esi
- rorl $7,%edi
- movdqa %xmm4,%xmm5
- paddd %xmm0,%xmm4
- addl %ebp,%ecx
- movl %edx,%ebp
- pxor %xmm3,%xmm1
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movdqa %xmm1,%xmm3
- movdqa %xmm4,(%esp)
- movl %edi,%esi
- xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- pslld $2,%xmm1
- andl %edi,%ebp
- rorl $7,%edx
- psrld $30,%xmm3
- addl %esi,%ebx
- movl %ecx,%esi
- roll $5,%ecx
- addl %ebp,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- por %xmm3,%xmm1
- movl %edx,%ebp
- xorl %edi,%edx
- movdqa 96(%esp),%xmm3
- addl 24(%esp),%eax
- andl %edi,%ebp
- andl %edx,%esi
- rorl $7,%ecx
- addl %ebp,%eax
- movdqa %xmm1,%xmm4
- movl %ebx,%ebp
- roll $5,%ebx
- addl %esi,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
- addl 28(%esp),%edi
- andl %edx,%esi
- andl %ecx,%ebp
- rorl $7,%ebx
- addl %esi,%edi
- movl %eax,%esi
- roll $5,%eax
- addl %ebp,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- movl %ebx,%ebp
- pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
- xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
- pxor %xmm3,%xmm2
- movdqa %xmm6,96(%esp)
- andl %ebx,%esi
- rorl $7,%eax
- movdqa %xmm5,%xmm6
- paddd %xmm1,%xmm5
- addl %ebp,%edx
- movl %edi,%ebp
- pxor %xmm4,%xmm2
- roll $5,%edi
- addl %esi,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- movdqa %xmm2,%xmm4
- movdqa %xmm5,16(%esp)
- movl %eax,%esi
- xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- pslld $2,%xmm2
- andl %eax,%ebp
- rorl $7,%edi
- psrld $30,%xmm4
- addl %esi,%ecx
- movl %edx,%esi
- roll $5,%edx
- addl %ebp,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- por %xmm4,%xmm2
- movl %edi,%ebp
- xorl %eax,%edi
- movdqa 64(%esp),%xmm4
- addl 40(%esp),%ebx
- andl %eax,%ebp
- andl %edi,%esi
- rorl $7,%edx
- addl %ebp,%ebx
- movdqa %xmm2,%xmm5
- movl %ecx,%ebp
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- movl %edx,%esi
- xorl %edi,%edx
- addl 44(%esp),%eax
- andl %edi,%esi
- andl %edx,%ebp
- rorl $7,%ecx
- addl %esi,%eax
- movl %ebx,%esi
- roll $5,%ebx
- addl %ebp,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- addl 48(%esp),%edi
- pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %edx,%esi
- movl %eax,%ebp
- roll $5,%eax
- pxor %xmm4,%xmm3
- movdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
- movdqa %xmm6,%xmm7
- paddd %xmm2,%xmm6
- rorl $7,%ebx
- addl %esi,%edi
- pxor %xmm5,%xmm3
- addl 52(%esp),%edx
- xorl %ecx,%ebp
- movl %edi,%esi
- roll $5,%edi
- movdqa %xmm3,%xmm5
- movdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
- pslld $2,%xmm3
- addl 56(%esp),%ecx
- xorl %ebx,%esi
- psrld $30,%xmm5
- movl %edx,%ebp
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- por %xmm5,%xmm3
- addl 60(%esp),%ebx
- xorl %eax,%ebp
- movl %ecx,%esi
- roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
- addl (%esp),%eax
- paddd %xmm3,%xmm7
- xorl %edi,%esi
- movl %ebx,%ebp
- roll $5,%ebx
- xorl %edx,%esi
- movdqa %xmm7,48(%esp)
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 4(%esp),%edi
- xorl %edx,%ebp
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
- addl 8(%esp),%edx
- xorl %ecx,%esi
- movl %edi,%ebp
- roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- addl 12(%esp),%ecx
- xorl %ebx,%ebp
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
- movl 196(%esp),%ebp
- cmpl 200(%esp),%ebp
- je .L005done
- movdqa 160(%esp),%xmm7
- movdqa 176(%esp),%xmm6
- movdqu (%ebp),%xmm0
- movdqu 16(%ebp),%xmm1
- movdqu 32(%ebp),%xmm2
- movdqu 48(%ebp),%xmm3
- addl $64,%ebp
-.byte 102,15,56,0,198
- movl %ebp,196(%esp)
- movdqa %xmm7,96(%esp)
- addl 16(%esp),%ebx
- xorl %eax,%esi
-.byte 102,15,56,0,206
- movl %ecx,%ebp
- roll $5,%ecx
- paddd %xmm7,%xmm0
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- movdqa %xmm0,(%esp)
- addl 20(%esp),%eax
- xorl %edi,%ebp
- psubd %xmm7,%xmm0
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- addl 24(%esp),%edi
- xorl %edx,%esi
- movl %eax,%ebp
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
- addl 28(%esp),%edx
- xorl %ecx,%ebp
- movl %edi,%esi
- roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
- addl 32(%esp),%ecx
- xorl %ebx,%esi
-.byte 102,15,56,0,214
- movl %edx,%ebp
- roll $5,%edx
- paddd %xmm7,%xmm1
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- movdqa %xmm1,16(%esp)
- addl 36(%esp),%ebx
- xorl %eax,%ebp
- psubd %xmm7,%xmm1
- movl %ecx,%esi
- roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
- addl 40(%esp),%eax
- xorl %edi,%esi
- movl %ebx,%ebp
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 44(%esp),%edi
- xorl %edx,%ebp
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
- addl 48(%esp),%edx
- xorl %ecx,%esi
-.byte 102,15,56,0,222
- movl %edi,%ebp
- roll $5,%edi
- paddd %xmm7,%xmm2
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- movdqa %xmm2,32(%esp)
- addl 52(%esp),%ecx
- xorl %ebx,%ebp
- psubd %xmm7,%xmm2
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
- addl 56(%esp),%ebx
- xorl %eax,%esi
- movl %ecx,%ebp
- roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 60(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,8(%ebp)
- movl %esi,%ebx
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- movdqa %xmm1,%xmm4
- jmp .L004loop
-.align 16
-.L005done:
- addl 16(%esp),%ebx
- xorl %eax,%esi
- movl %ecx,%ebp
- roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 20(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- addl 24(%esp),%edi
- xorl %edx,%esi
- movl %eax,%ebp
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
- addl 28(%esp),%edx
- xorl %ecx,%ebp
- movl %edi,%esi
- roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
- addl 32(%esp),%ecx
- xorl %ebx,%esi
- movl %edx,%ebp
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- addl 36(%esp),%ebx
- xorl %eax,%ebp
- movl %ecx,%esi
- roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
- addl 40(%esp),%eax
- xorl %edi,%esi
- movl %ebx,%ebp
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 44(%esp),%edi
- xorl %edx,%ebp
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
- addl 48(%esp),%edx
- xorl %ecx,%esi
- movl %edi,%ebp
- roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- addl 52(%esp),%ecx
- xorl %ebx,%ebp
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
- addl 56(%esp),%ebx
- xorl %eax,%esi
- movl %ecx,%ebp
- roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 60(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- movl 204(%esp),%esp
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,8(%ebp)
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
-.align 64
-.LK_XX_XX:
-.long 1518500249,1518500249,1518500249,1518500249
-.long 1859775393,1859775393,1859775393,1859775393
-.long 2400959708,2400959708,2400959708,2400959708
-.long 3395469782,3395469782,3395469782,3395469782
-.long 66051,67438087,134810123,202182159
-.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
-.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
-.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
-.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
diff --git a/app/openssl/crypto/sha/asm/sha1-586.pl b/app/openssl/crypto/sha/asm/sha1-586.pl
deleted file mode 100644
index 2b119ffa..00000000
--- a/app/openssl/crypto/sha/asm/sha1-586.pl
+++ /dev/null
@@ -1,1229 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# [Re]written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# "[Re]written" was achieved in two major overhauls. In 2004 BODY_*
-# functions were re-implemented to address P4 performance issue [see
-# commentary below], and in 2006 the rest was rewritten in order to
-# gain freedom to liberate licensing terms.
-
-# January, September 2004.
-#
-# It was noted that Intel IA-32 C compiler generates code which
-# performs ~30% *faster* on P4 CPU than original *hand-coded*
-# SHA1 assembler implementation. To address this problem (and
-# prove that humans are still better than machines:-), the
-# original code was overhauled, which resulted in following
-# performance changes:
-#
-# compared with original compared with Intel cc
-# assembler impl. generated code
-# Pentium -16% +48%
-# PIII/AMD +8% +16%
-# P4 +85%(!) +45%
-#
-# As you can see Pentium came out as looser:-( Yet I reckoned that
-# improvement on P4 outweights the loss and incorporate this
-# re-tuned code to 0.9.7 and later.
-# ----------------------------------------------------------------
-# <appro@fy.chalmers.se>
-
-# August 2009.
-#
-# George Spelvin has tipped that F_40_59(b,c,d) can be rewritten as
-# '(c&d) + (b&(c^d))', which allows to accumulate partial results
-# and lighten "pressure" on scratch registers. This resulted in
-# >12% performance improvement on contemporary AMD cores (with no
-# degradation on other CPUs:-). Also, the code was revised to maximize
-# "distance" between instructions producing input to 'lea' instruction
-# and the 'lea' instruction itself, which is essential for Intel Atom
-# core and resulted in ~15% improvement.
-
-# October 2010.
-#
-# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it
-# is to offload message schedule denoted by Wt in NIST specification,
-# or Xupdate in OpenSSL source, to SIMD unit. The idea is not novel,
-# and in SSE2 context was first explored by Dean Gaudet in 2004, see
-# http://arctic.org/~dean/crypto/sha1.html. Since then several things
-# have changed that made it interesting again:
-#
-# a) XMM units became faster and wider;
-# b) instruction set became more versatile;
-# c) an important observation was made by Max Locktykhin, which made
-# it possible to reduce amount of instructions required to perform
-# the operation in question, for further details see
-# http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/.
-
-# April 2011.
-#
-# Add AVX code path, probably most controversial... The thing is that
-# switch to AVX alone improves performance by as little as 4% in
-# comparison to SSSE3 code path. But below result doesn't look like
-# 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as
-# pair of µ-ops, and it's the additional µ-ops, two per round, that
-# make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded
-# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
-# equivalent 'sh[rl]d' that is responsible for the impressive 5.1
-# cycles per processed byte. But 'sh[rl]d' is not something that used
-# to be fast, nor does it appear to be fast in upcoming Bulldozer
-# [according to its optimization manual]. Which is why AVX code path
-# is guarded by *both* AVX and synthetic bit denoting Intel CPUs.
-# One can argue that it's unfair to AMD, but without 'sh[rl]d' it
-# makes no sense to keep the AVX code path. If somebody feels that
-# strongly, it's probably more appropriate to discuss possibility of
-# using vector rotate XOP on AMD...
-
-######################################################################
-# Current performance is summarized in following table. Numbers are
-# CPU clock cycles spent to process single byte (less is better).
-#
-# x86 SSSE3 AVX
-# Pentium 15.7 -
-# PIII 11.5 -
-# P4 10.6 -
-# AMD K8 7.1 -
-# Core2 7.3 6.1/+20% -
-# Atom 12.5 9.5(*)/+32% -
-# Westmere 7.3 5.6/+30% -
-# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+70%
-#
-# (*) Loop is 1056 instructions long and expected result is ~8.25.
-# It remains mystery [to me] why ILP is limited to 1.7.
-#
-# (**) As per above comment, the result is for AVX *plus* sh[rl]d.
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386");
-
-$xmm=$ymm=0;
-for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); }
-
-$ymm=1 if ($xmm &&
- `$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
- $1>=2.19); # first version supporting AVX
-
-$ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32n" &&
- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
- $1>=2.03); # first version supporting AVX
-
-&external_label("OPENSSL_ia32cap_P") if ($xmm);
-
-
-$A="eax";
-$B="ebx";
-$C="ecx";
-$D="edx";
-$E="edi";
-$T="esi";
-$tmp1="ebp";
-
-@V=($A,$B,$C,$D,$E,$T);
-
-$alt=0; # 1 denotes alternative IALU implementation, which performs
- # 8% *worse* on P4, same on Westmere and Atom, 2% better on
- # Sandy Bridge...
-
-sub BODY_00_15
- {
- local($n,$a,$b,$c,$d,$e,$f)=@_;
-
- &comment("00_15 $n");
-
- &mov($f,$c); # f to hold F_00_19(b,c,d)
- if ($n==0) { &mov($tmp1,$a); }
- else { &mov($a,$tmp1); }
- &rotl($tmp1,5); # tmp1=ROTATE(a,5)
- &xor($f,$d);
- &add($tmp1,$e); # tmp1+=e;
- &mov($e,&swtmp($n%16)); # e becomes volatile and is loaded
- # with xi, also note that e becomes
- # f in next round...
- &and($f,$b);
- &rotr($b,2); # b=ROTATE(b,30)
- &xor($f,$d); # f holds F_00_19(b,c,d)
- &lea($tmp1,&DWP(0x5a827999,$tmp1,$e)); # tmp1+=K_00_19+xi
-
- if ($n==15) { &mov($e,&swtmp(($n+1)%16));# pre-fetch f for next round
- &add($f,$tmp1); } # f+=tmp1
- else { &add($tmp1,$f); } # f becomes a in next round
- &mov($tmp1,$a) if ($alt && $n==15);
- }
-
-sub BODY_16_19
- {
- local($n,$a,$b,$c,$d,$e,$f)=@_;
-
- &comment("16_19 $n");
-
-if ($alt) {
- &xor($c,$d);
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &and($tmp1,$c); # tmp1 to hold F_00_19(b,c,d), b&=c^d
- &xor($f,&swtmp(($n+8)%16));
- &xor($tmp1,$d); # tmp1=F_00_19(b,c,d)
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &add($e,$tmp1); # e+=F_00_19(b,c,d)
- &xor($c,$d); # restore $c
- &mov($tmp1,$a); # b in next round
- &rotr($b,$n==16?2:7); # b=ROTATE(b,30)
- &mov(&swtmp($n%16),$f); # xi=f
- &rotl($a,5); # ROTATE(a,5)
- &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e
- &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round
- &add($f,$a); # f+=ROTATE(a,5)
-} else {
- &mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d)
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &xor($tmp1,$d);
- &xor($f,&swtmp(($n+8)%16));
- &and($tmp1,$b);
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &xor($tmp1,$d); # tmp1=F_00_19(b,c,d)
- &add($e,$tmp1); # e+=F_00_19(b,c,d)
- &mov($tmp1,$a);
- &rotr($b,2); # b=ROTATE(b,30)
- &mov(&swtmp($n%16),$f); # xi=f
- &rotl($tmp1,5); # ROTATE(a,5)
- &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e
- &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round
- &add($f,$tmp1); # f+=ROTATE(a,5)
-}
- }
-
-sub BODY_20_39
- {
- local($n,$a,$b,$c,$d,$e,$f)=@_;
- local $K=($n<40)?0x6ed9eba1:0xca62c1d6;
-
- &comment("20_39 $n");
-
-if ($alt) {
- &xor($tmp1,$c); # tmp1 to hold F_20_39(b,c,d), b^=c
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d)
- &xor($f,&swtmp(($n+8)%16));
- &add($e,$tmp1); # e+=F_20_39(b,c,d)
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &mov($tmp1,$a); # b in next round
- &rotr($b,7); # b=ROTATE(b,30)
- &mov(&swtmp($n%16),$f) if($n<77);# xi=f
- &rotl($a,5); # ROTATE(a,5)
- &xor($b,$c) if($n==39);# warm up for BODY_40_59
- &and($tmp1,$b) if($n==39);
- &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY
- &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round
- &add($f,$a); # f+=ROTATE(a,5)
- &rotr($a,5) if ($n==79);
-} else {
- &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d)
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &xor($tmp1,$c);
- &xor($f,&swtmp(($n+8)%16));
- &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d)
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &add($e,$tmp1); # e+=F_20_39(b,c,d)
- &rotr($b,2); # b=ROTATE(b,30)
- &mov($tmp1,$a);
- &rotl($tmp1,5); # ROTATE(a,5)
- &mov(&swtmp($n%16),$f) if($n<77);# xi=f
- &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY
- &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round
- &add($f,$tmp1); # f+=ROTATE(a,5)
-}
- }
-
-sub BODY_40_59
- {
- local($n,$a,$b,$c,$d,$e,$f)=@_;
-
- &comment("40_59 $n");
-
-if ($alt) {
- &add($e,$tmp1); # e+=b&(c^d)
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &mov($tmp1,$d);
- &xor($f,&swtmp(($n+8)%16));
- &xor($c,$d); # restore $c
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &and($tmp1,$c);
- &rotr($b,7); # b=ROTATE(b,30)
- &add($e,$tmp1); # e+=c&d
- &mov($tmp1,$a); # b in next round
- &mov(&swtmp($n%16),$f); # xi=f
- &rotl($a,5); # ROTATE(a,5)
- &xor($b,$c) if ($n<59);
- &and($tmp1,$b) if ($n<59);# tmp1 to hold F_40_59(b,c,d)
- &lea($f,&DWP(0x8f1bbcdc,$f,$e));# f+=K_40_59+e+(b&(c^d))
- &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round
- &add($f,$a); # f+=ROTATE(a,5)
-} else {
- &mov($tmp1,$c); # tmp1 to hold F_40_59(b,c,d)
- &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd)
- &xor($tmp1,$d);
- &xor($f,&swtmp(($n+8)%16));
- &and($tmp1,$b);
- &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd
- &rotl($f,1); # f=ROTATE(f,1)
- &add($tmp1,$e); # b&(c^d)+=e
- &rotr($b,2); # b=ROTATE(b,30)
- &mov($e,$a); # e becomes volatile
- &rotl($e,5); # ROTATE(a,5)
- &mov(&swtmp($n%16),$f); # xi=f
- &lea($f,&DWP(0x8f1bbcdc,$f,$tmp1));# f+=K_40_59+e+(b&(c^d))
- &mov($tmp1,$c);
- &add($f,$e); # f+=ROTATE(a,5)
- &and($tmp1,$d);
- &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round
- &add($f,$tmp1); # f+=c&d
-}
- }
-
-&function_begin("sha1_block_data_order");
-if ($xmm) {
- &static_label("ssse3_shortcut");
- &static_label("avx_shortcut") if ($ymm);
- &static_label("K_XX_XX");
-
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-
- &mov ($A,&DWP(0,$T));
- &mov ($D,&DWP(4,$T));
- &test ($D,1<<9); # check SSSE3 bit
- &jz (&label("x86"));
- &test ($A,1<<24); # check FXSR bit
- &jz (&label("x86"));
- if ($ymm) {
- &and ($D,1<<28); # mask AVX bit
- &and ($A,1<<30); # mask "Intel CPU" bit
- &or ($A,$D);
- &cmp ($A,1<<28|1<<30);
- &je (&label("avx_shortcut"));
- }
- &jmp (&label("ssse3_shortcut"));
- &set_label("x86",16);
-}
- &mov($tmp1,&wparam(0)); # SHA_CTX *c
- &mov($T,&wparam(1)); # const void *input
- &mov($A,&wparam(2)); # size_t num
- &stack_push(16+3); # allocate X[16]
- &shl($A,6);
- &add($A,$T);
- &mov(&wparam(2),$A); # pointer beyond the end of input
- &mov($E,&DWP(16,$tmp1));# pre-load E
- &jmp(&label("loop"));
-
-&set_label("loop",16);
-
- # copy input chunk to X, but reversing byte order!
- for ($i=0; $i<16; $i+=4)
- {
- &mov($A,&DWP(4*($i+0),$T));
- &mov($B,&DWP(4*($i+1),$T));
- &mov($C,&DWP(4*($i+2),$T));
- &mov($D,&DWP(4*($i+3),$T));
- &bswap($A);
- &bswap($B);
- &bswap($C);
- &bswap($D);
- &mov(&swtmp($i+0),$A);
- &mov(&swtmp($i+1),$B);
- &mov(&swtmp($i+2),$C);
- &mov(&swtmp($i+3),$D);
- }
- &mov(&wparam(1),$T); # redundant in 1st spin
-
- &mov($A,&DWP(0,$tmp1)); # load SHA_CTX
- &mov($B,&DWP(4,$tmp1));
- &mov($C,&DWP(8,$tmp1));
- &mov($D,&DWP(12,$tmp1));
- # E is pre-loaded
-
- for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
- for(;$i<20;$i++) { &BODY_16_19($i,@V); unshift(@V,pop(@V)); }
- for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
- for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
- for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-
- (($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check
-
- &mov($tmp1,&wparam(0)); # re-load SHA_CTX*
- &mov($D,&wparam(1)); # D is last "T" and is discarded
-
- &add($E,&DWP(0,$tmp1)); # E is last "A"...
- &add($T,&DWP(4,$tmp1));
- &add($A,&DWP(8,$tmp1));
- &add($B,&DWP(12,$tmp1));
- &add($C,&DWP(16,$tmp1));
-
- &mov(&DWP(0,$tmp1),$E); # update SHA_CTX
- &add($D,64); # advance input pointer
- &mov(&DWP(4,$tmp1),$T);
- &cmp($D,&wparam(2)); # have we reached the end yet?
- &mov(&DWP(8,$tmp1),$A);
- &mov($E,$C); # C is last "E" which needs to be "pre-loaded"
- &mov(&DWP(12,$tmp1),$B);
- &mov($T,$D); # input pointer
- &mov(&DWP(16,$tmp1),$C);
- &jb(&label("loop"));
-
- &stack_pop(16+3);
-&function_end("sha1_block_data_order");
-
-if ($xmm) {
-######################################################################
-# The SSSE3 implementation.
-#
-# %xmm[0-7] are used as ring @X[] buffer containing quadruples of last
-# 32 elements of the message schedule or Xupdate outputs. First 4
-# quadruples are simply byte-swapped input, next 4 are calculated
-# according to method originally suggested by Dean Gaudet (modulo
-# being implemented in SSSE3). Once 8 quadruples or 32 elements are
-# collected, it switches to routine proposed by Max Locktyukhin.
-#
-# Calculations inevitably require temporary reqisters, and there are
-# no %xmm registers left to spare. For this reason part of the ring
-# buffer, X[2..4] to be specific, is offloaded to 3 quadriples ring
-# buffer on the stack. Keep in mind that X[2] is alias X[-6], X[3] -
-# X[-5], and X[4] - X[-4]...
-#
-# Another notable optimization is aggressive stack frame compression
-# aiming to minimize amount of 9-byte instructions...
-#
-# Yet another notable optimization is "jumping" $B variable. It means
-# that there is no register permanently allocated for $B value. This
-# allowed to eliminate one instruction from body_20_39...
-#
-my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded
-my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4
-my @V=($A,$B,$C,$D,$E);
-my $j=0; # hash round
-my @T=($T,$tmp1);
-my $inp;
-
-my $_rol=sub { &rol(@_) };
-my $_ror=sub { &ror(@_) };
-
-&function_begin("_sha1_block_data_order_ssse3");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-&set_label("ssse3_shortcut");
-
- &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
- &movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39
- &movdqa (@X[5],&QWP(32,$tmp1)); # K_40_59
- &movdqa (@X[6],&QWP(48,$tmp1)); # K_60_79
- &movdqa (@X[2],&QWP(64,$tmp1)); # pbswap mask
-
- &mov ($E,&wparam(0)); # load argument block
- &mov ($inp=@T[1],&wparam(1));
- &mov ($D,&wparam(2));
- &mov (@T[0],"esp");
-
- # stack frame layout
- #
- # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area
- # X[4]+K X[5]+K X[6]+K X[7]+K
- # X[8]+K X[9]+K X[10]+K X[11]+K
- # X[12]+K X[13]+K X[14]+K X[15]+K
- #
- # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area
- # X[4] X[5] X[6] X[7]
- # X[8] X[9] X[10] X[11] # even borrowed for K_00_19
- #
- # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants
- # K_40_59 K_40_59 K_40_59 K_40_59
- # K_60_79 K_60_79 K_60_79 K_60_79
- # K_00_19 K_00_19 K_00_19 K_00_19
- # pbswap mask
- #
- # +192 ctx # argument block
- # +196 inp
- # +200 end
- # +204 esp
- &sub ("esp",208);
- &and ("esp",-64);
-
- &movdqa (&QWP(112+0,"esp"),@X[4]); # copy constants
- &movdqa (&QWP(112+16,"esp"),@X[5]);
- &movdqa (&QWP(112+32,"esp"),@X[6]);
- &shl ($D,6); # len*64
- &movdqa (&QWP(112+48,"esp"),@X[3]);
- &add ($D,$inp); # end of input
- &movdqa (&QWP(112+64,"esp"),@X[2]);
- &add ($inp,64);
- &mov (&DWP(192+0,"esp"),$E); # save argument block
- &mov (&DWP(192+4,"esp"),$inp);
- &mov (&DWP(192+8,"esp"),$D);
- &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp
-
- &mov ($A,&DWP(0,$E)); # load context
- &mov ($B,&DWP(4,$E));
- &mov ($C,&DWP(8,$E));
- &mov ($D,&DWP(12,$E));
- &mov ($E,&DWP(16,$E));
- &mov (@T[0],$B); # magic seed
-
- &movdqu (@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3]
- &movdqu (@X[-3&7],&QWP(-48,$inp));
- &movdqu (@X[-2&7],&QWP(-32,$inp));
- &movdqu (@X[-1&7],&QWP(-16,$inp));
- &pshufb (@X[-4&7],@X[2]); # byte swap
- &pshufb (@X[-3&7],@X[2]);
- &pshufb (@X[-2&7],@X[2]);
- &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot
- &pshufb (@X[-1&7],@X[2]);
- &paddd (@X[-4&7],@X[3]); # add K_00_19
- &paddd (@X[-3&7],@X[3]);
- &paddd (@X[-2&7],@X[3]);
- &movdqa (&QWP(0,"esp"),@X[-4&7]); # X[]+K xfer to IALU
- &psubd (@X[-4&7],@X[3]); # restore X[]
- &movdqa (&QWP(0+16,"esp"),@X[-3&7]);
- &psubd (@X[-3&7],@X[3]);
- &movdqa (&QWP(0+32,"esp"),@X[-2&7]);
- &psubd (@X[-2&7],@X[3]);
- &movdqa (@X[0],@X[-3&7]);
- &jmp (&label("loop"));
-
-######################################################################
-# SSE instruction sequence is first broken to groups of indepentent
-# instructions, independent in respect to their inputs and shifter
-# (not all architectures have more than one). Then IALU instructions
-# are "knitted in" between the SSE groups. Distance is maintained for
-# SSE latency of 2 in hope that it fits better upcoming AMD Bulldozer
-# [which allegedly also implements SSSE3]...
-#
-# Temporary registers usage. X[2] is volatile at the entry and at the
-# end is restored from backtrace ring buffer. X[3] is expected to
-# contain current K_XX_XX constant and is used to caclulate X[-1]+K
-# from previous round, it becomes volatile the moment the value is
-# saved to stack for transfer to IALU. X[4] becomes volatile whenever
-# X[-4] is accumulated and offloaded to backtrace ring buffer, at the
-# end it is loaded with next K_XX_XX [which becomes X[3] in next
-# round]...
-#
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]"
- &movdqa (@X[2],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
-
- &paddd (@X[3],@X[-1&7]);
- &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
- &psrldq (@X[2],4); # "X[-3]", 3 dwords
- eval(shift(@insns));
- eval(shift(@insns));
- &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@X[2],@X[-2&7]); # "X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- &movdqa (@X[4],@X[0]);
- &movdqa (@X[2],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pslldq (@X[4],12); # "X[0]"<<96, extract one dword
- &paddd (@X[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &psrld (@X[2],31);
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@X[3],@X[4]);
- eval(shift(@insns));
- eval(shift(@insns));
-
- &psrld (@X[4],30);
- &por (@X[0],@X[2]); # "X[0]"<<<=1
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pslld (@X[3],2);
- &pxor (@X[0],@X[4]);
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@X[0],@X[3]); # "X[0]"^=("X[0]"<<96)<<<2
- &movdqa (@X[1],@X[-2&7]) if ($Xi<7);
- eval(shift(@insns));
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xupdate_ssse3_32_79()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
- my ($a,$b,$c,$d,$e);
-
- &movdqa (@X[2],@X[-1&7]) if ($Xi==8);
- eval(shift(@insns)); # body_20_39
- &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
- &palignr(@X[2],@X[-2&7],8); # compose "X[-6]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
- &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
- if ($Xi%5) {
- &movdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX...
- } else { # ... or load next one
- &movdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp"));
- }
- &paddd (@X[3],@X[-1&7]);
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &pxor (@X[0],@X[2]); # "X[0]"^="X[-6]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &movdqa (@X[2],@X[0]);
- &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &pslld (@X[0],2);
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &psrld (@X[2],30);
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &por (@X[0],@X[2]); # "X[0]"<<<=2
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- &movdqa (@X[3],@X[0]) if ($Xi<19);
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xuplast_ssse3_80()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- &paddd (@X[3],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU
-
- foreach (@insns) { eval; } # remaining instructions
-
- &mov ($inp=@T[1],&DWP(192+4,"esp"));
- &cmp ($inp,&DWP(192+8,"esp"));
- &je (&label("done"));
-
- &movdqa (@X[3],&QWP(112+48,"esp")); # K_00_19
- &movdqa (@X[2],&QWP(112+64,"esp")); # pbswap mask
- &movdqu (@X[-4&7],&QWP(0,$inp)); # load input
- &movdqu (@X[-3&7],&QWP(16,$inp));
- &movdqu (@X[-2&7],&QWP(32,$inp));
- &movdqu (@X[-1&7],&QWP(48,$inp));
- &add ($inp,64);
- &pshufb (@X[-4&7],@X[2]); # byte swap
- &mov (&DWP(192+4,"esp"),$inp);
- &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot
-
- $Xi=0;
-}
-
-sub Xloop_ssse3()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &pshufb (@X[($Xi-3)&7],@X[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &paddd (@X[($Xi-4)&7],@X[3]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (&QWP(0+16*$Xi,"esp"),@X[($Xi-4)&7]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- &psubd (@X[($Xi-4)&7],@X[3]);
-
- foreach (@insns) { eval; }
- $Xi++;
-}
-
-sub Xtail_ssse3()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- foreach (@insns) { eval; }
-}
-
-sub body_00_19 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer
- '&xor ($c,$d);',
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&and (@T[0],$c);', # ($b&($c^$d))
- '&xor ($c,$d);', # restore $c
- '&xor (@T[0],$d);',
- '&add ($e,$a);',
- '&$_ror ($b,$j?7:2);', # $b>>>2
- '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-
-sub body_20_39 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer
- '&xor (@T[0],$d);', # ($b^$d)
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&xor (@T[0],$c);', # ($b^$d^$c)
- '&add ($e,$a);',
- '&$_ror ($b,7);', # $b>>>2
- '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-
-sub body_40_59 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&mov (@T[1],$c);',
- '&xor ($c,$d);',
- '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer
- '&and (@T[1],$d);',
- '&and (@T[0],$c);', # ($b&($c^$d))
- '&$_ror ($b,7);', # $b>>>2
- '&add ($e,@T[1]);',
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&add ($e,@T[0]);',
- '&xor ($c,$d);', # restore $c
- '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-
-&set_label("loop",16);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_32_79(\&body_00_19);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xuplast_ssse3_80(\&body_20_39); # can jump to "done"
-
- $saved_j=$j; @saved_V=@V;
-
- &Xloop_ssse3(\&body_20_39);
- &Xloop_ssse3(\&body_20_39);
- &Xloop_ssse3(\&body_20_39);
-
- &mov (@T[1],&DWP(192,"esp")); # update context
- &add ($A,&DWP(0,@T[1]));
- &add (@T[0],&DWP(4,@T[1])); # $b
- &add ($C,&DWP(8,@T[1]));
- &mov (&DWP(0,@T[1]),$A);
- &add ($D,&DWP(12,@T[1]));
- &mov (&DWP(4,@T[1]),@T[0]);
- &add ($E,&DWP(16,@T[1]));
- &mov (&DWP(8,@T[1]),$C);
- &mov ($B,@T[0]);
- &mov (&DWP(12,@T[1]),$D);
- &mov (&DWP(16,@T[1]),$E);
- &movdqa (@X[0],@X[-3&7]);
-
- &jmp (&label("loop"));
-
-&set_label("done",16); $j=$saved_j; @V=@saved_V;
-
- &Xtail_ssse3(\&body_20_39);
- &Xtail_ssse3(\&body_20_39);
- &Xtail_ssse3(\&body_20_39);
-
- &mov (@T[1],&DWP(192,"esp")); # update context
- &add ($A,&DWP(0,@T[1]));
- &mov ("esp",&DWP(192+12,"esp")); # restore %esp
- &add (@T[0],&DWP(4,@T[1])); # $b
- &add ($C,&DWP(8,@T[1]));
- &mov (&DWP(0,@T[1]),$A);
- &add ($D,&DWP(12,@T[1]));
- &mov (&DWP(4,@T[1]),@T[0]);
- &add ($E,&DWP(16,@T[1]));
- &mov (&DWP(8,@T[1]),$C);
- &mov (&DWP(12,@T[1]),$D);
- &mov (&DWP(16,@T[1]),$E);
-
-&function_end("_sha1_block_data_order_ssse3");
-
-if ($ymm) {
-my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded
-my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4
-my @V=($A,$B,$C,$D,$E);
-my $j=0; # hash round
-my @T=($T,$tmp1);
-my $inp;
-
-my $_rol=sub { &shld(@_[0],@_) };
-my $_ror=sub { &shrd(@_[0],@_) };
-
-&function_begin("_sha1_block_data_order_avx");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-&set_label("avx_shortcut");
- &vzeroall();
-
- &vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19
- &vmovdqa(@X[4],&QWP(16,$tmp1)); # K_20_39
- &vmovdqa(@X[5],&QWP(32,$tmp1)); # K_40_59
- &vmovdqa(@X[6],&QWP(48,$tmp1)); # K_60_79
- &vmovdqa(@X[2],&QWP(64,$tmp1)); # pbswap mask
-
- &mov ($E,&wparam(0)); # load argument block
- &mov ($inp=@T[1],&wparam(1));
- &mov ($D,&wparam(2));
- &mov (@T[0],"esp");
-
- # stack frame layout
- #
- # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area
- # X[4]+K X[5]+K X[6]+K X[7]+K
- # X[8]+K X[9]+K X[10]+K X[11]+K
- # X[12]+K X[13]+K X[14]+K X[15]+K
- #
- # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area
- # X[4] X[5] X[6] X[7]
- # X[8] X[9] X[10] X[11] # even borrowed for K_00_19
- #
- # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants
- # K_40_59 K_40_59 K_40_59 K_40_59
- # K_60_79 K_60_79 K_60_79 K_60_79
- # K_00_19 K_00_19 K_00_19 K_00_19
- # pbswap mask
- #
- # +192 ctx # argument block
- # +196 inp
- # +200 end
- # +204 esp
- &sub ("esp",208);
- &and ("esp",-64);
-
- &vmovdqa(&QWP(112+0,"esp"),@X[4]); # copy constants
- &vmovdqa(&QWP(112+16,"esp"),@X[5]);
- &vmovdqa(&QWP(112+32,"esp"),@X[6]);
- &shl ($D,6); # len*64
- &vmovdqa(&QWP(112+48,"esp"),@X[3]);
- &add ($D,$inp); # end of input
- &vmovdqa(&QWP(112+64,"esp"),@X[2]);
- &add ($inp,64);
- &mov (&DWP(192+0,"esp"),$E); # save argument block
- &mov (&DWP(192+4,"esp"),$inp);
- &mov (&DWP(192+8,"esp"),$D);
- &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp
-
- &mov ($A,&DWP(0,$E)); # load context
- &mov ($B,&DWP(4,$E));
- &mov ($C,&DWP(8,$E));
- &mov ($D,&DWP(12,$E));
- &mov ($E,&DWP(16,$E));
- &mov (@T[0],$B); # magic seed
-
- &vmovdqu(@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3]
- &vmovdqu(@X[-3&7],&QWP(-48,$inp));
- &vmovdqu(@X[-2&7],&QWP(-32,$inp));
- &vmovdqu(@X[-1&7],&QWP(-16,$inp));
- &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap
- &vpshufb(@X[-3&7],@X[-3&7],@X[2]);
- &vpshufb(@X[-2&7],@X[-2&7],@X[2]);
- &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot
- &vpshufb(@X[-1&7],@X[-1&7],@X[2]);
- &vpaddd (@X[0],@X[-4&7],@X[3]); # add K_00_19
- &vpaddd (@X[1],@X[-3&7],@X[3]);
- &vpaddd (@X[2],@X[-2&7],@X[3]);
- &vmovdqa(&QWP(0,"esp"),@X[0]); # X[]+K xfer to IALU
- &vmovdqa(&QWP(0+16,"esp"),@X[1]);
- &vmovdqa(&QWP(0+32,"esp"),@X[2]);
- &jmp (&label("loop"));
-
-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpaddd (@X[3],@X[3],@X[-1&7]);
- &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
- &vpsrldq(@X[2],@X[-1&7],4); # "X[-3]", 3 dwords
- eval(shift(@insns));
- eval(shift(@insns));
- &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@X[2],@X[2],@X[-2&7]); # "X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpsrld (@X[2],@X[0],31);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpslldq(@X[4],@X[0],12); # "X[0]"<<96, extract one dword
- &vpaddd (@X[0],@X[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpsrld (@X[3],@X[4],30);
- &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=1
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpslld (@X[4],@X[4],2);
- &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
- &vpxor (@X[0],@X[0],@X[3]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@X[4]); # "X[0]"^=("X[0]"<<96)<<<2
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX
- eval(shift(@insns));
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xupdate_avx_32_79()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
- my ($a,$b,$c,$d,$e);
-
- &vpalignr(@X[2],@X[-1&7],@X[-2&7],8); # compose "X[-6]"
- &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
- &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns));
- if ($Xi%5) {
- &vmovdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX...
- } else { # ... or load next one
- &vmovdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp"));
- }
- &vpaddd (@X[3],@X[3],@X[-1&7]);
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-6]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &vpsrld (@X[2],@X[0],30);
- &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpslld (@X[0],@X[0],2);
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=2
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xuplast_avx_80()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- &vpaddd (@X[3],@X[3],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU
-
- foreach (@insns) { eval; } # remaining instructions
-
- &mov ($inp=@T[1],&DWP(192+4,"esp"));
- &cmp ($inp,&DWP(192+8,"esp"));
- &je (&label("done"));
-
- &vmovdqa(@X[3],&QWP(112+48,"esp")); # K_00_19
- &vmovdqa(@X[2],&QWP(112+64,"esp")); # pbswap mask
- &vmovdqu(@X[-4&7],&QWP(0,$inp)); # load input
- &vmovdqu(@X[-3&7],&QWP(16,$inp));
- &vmovdqu(@X[-2&7],&QWP(32,$inp));
- &vmovdqu(@X[-1&7],&QWP(48,$inp));
- &add ($inp,64);
- &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap
- &mov (&DWP(192+4,"esp"),$inp);
- &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot
-
- $Xi=0;
-}
-
-sub Xloop_avx()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &vpshufb (@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@X[3]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa (&QWP(0+16*$Xi,"esp"),@X[$Xi&7]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- foreach (@insns) { eval; }
- $Xi++;
-}
-
-sub Xtail_avx()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- foreach (@insns) { eval; }
-}
-
-&set_label("loop",16);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_32_79(\&body_00_19);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xuplast_avx_80(\&body_20_39); # can jump to "done"
-
- $saved_j=$j; @saved_V=@V;
-
- &Xloop_avx(\&body_20_39);
- &Xloop_avx(\&body_20_39);
- &Xloop_avx(\&body_20_39);
-
- &mov (@T[1],&DWP(192,"esp")); # update context
- &add ($A,&DWP(0,@T[1]));
- &add (@T[0],&DWP(4,@T[1])); # $b
- &add ($C,&DWP(8,@T[1]));
- &mov (&DWP(0,@T[1]),$A);
- &add ($D,&DWP(12,@T[1]));
- &mov (&DWP(4,@T[1]),@T[0]);
- &add ($E,&DWP(16,@T[1]));
- &mov (&DWP(8,@T[1]),$C);
- &mov ($B,@T[0]);
- &mov (&DWP(12,@T[1]),$D);
- &mov (&DWP(16,@T[1]),$E);
-
- &jmp (&label("loop"));
-
-&set_label("done",16); $j=$saved_j; @V=@saved_V;
-
- &Xtail_avx(\&body_20_39);
- &Xtail_avx(\&body_20_39);
- &Xtail_avx(\&body_20_39);
-
- &vzeroall();
-
- &mov (@T[1],&DWP(192,"esp")); # update context
- &add ($A,&DWP(0,@T[1]));
- &mov ("esp",&DWP(192+12,"esp")); # restore %esp
- &add (@T[0],&DWP(4,@T[1])); # $b
- &add ($C,&DWP(8,@T[1]));
- &mov (&DWP(0,@T[1]),$A);
- &add ($D,&DWP(12,@T[1]));
- &mov (&DWP(4,@T[1]),@T[0]);
- &add ($E,&DWP(16,@T[1]));
- &mov (&DWP(8,@T[1]),$C);
- &mov (&DWP(12,@T[1]),$D);
- &mov (&DWP(16,@T[1]),$E);
-&function_end("_sha1_block_data_order_avx");
-}
-&set_label("K_XX_XX",64);
-&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
-&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39
-&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59
-&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79
-&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask
-}
-&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
diff --git a/app/openssl/crypto/sha/asm/sha1-alpha.pl b/app/openssl/crypto/sha/asm/sha1-alpha.pl
deleted file mode 100644
index 6c4b9251..00000000
--- a/app/openssl/crypto/sha/asm/sha1-alpha.pl
+++ /dev/null
@@ -1,322 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA1 block procedure for Alpha.
-
-# On 21264 performance is 33% better than code generated by vendor
-# compiler, and 75% better than GCC [3.4], and in absolute terms is
-# 8.7 cycles per processed byte. Implementation features vectorized
-# byte swap, but not Xupdate.
-
-@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
- "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
-$ctx="a0"; # $16
-$inp="a1";
-$num="a2";
-$A="a3";
-$B="a4"; # 20
-$C="a5";
-$D="t8";
-$E="t9"; @V=($A,$B,$C,$D,$E);
-$t0="t10"; # 24
-$t1="t11";
-$t2="ra";
-$t3="t12";
-$K="AT"; # 28
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i==0);
- ldq_u @X[0],0+0($inp)
- ldq_u @X[1],0+7($inp)
-___
-$code.=<<___ if (!($i&1) && $i<14);
- ldq_u @X[$i+2],($i+2)*4+0($inp)
- ldq_u @X[$i+3],($i+2)*4+7($inp)
-___
-$code.=<<___ if (!($i&1) && $i<15);
- extql @X[$i],$inp,@X[$i]
- extqh @X[$i+1],$inp,@X[$i+1]
-
- or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
-
- srl @X[$i],24,$t0 # vectorized byte swap
- srl @X[$i],8,$t2
-
- sll @X[$i],8,$t3
- sll @X[$i],24,@X[$i]
- zapnot $t0,0x11,$t0
- zapnot $t2,0x22,$t2
-
- zapnot @X[$i],0x88,@X[$i]
- or $t0,$t2,$t0
- zapnot $t3,0x44,$t3
- sll $a,5,$t1
-
- or @X[$i],$t0,@X[$i]
- addl $K,$e,$e
- and $b,$c,$t2
- zapnot $a,0xf,$a
-
- or @X[$i],$t3,@X[$i]
- srl $a,27,$t0
- bic $d,$b,$t3
- sll $b,30,$b
-
- extll @X[$i],4,@X[$i+1] # extract upper half
- or $t2,$t3,$t2
- addl @X[$i],$e,$e
-
- addl $t1,$e,$e
- srl $b,32,$t3
- zapnot @X[$i],0xf,@X[$i]
-
- addl $t0,$e,$e
- addl $t2,$e,$e
- or $t3,$b,$b
-___
-$code.=<<___ if (($i&1) && $i<15);
- sll $a,5,$t1
- addl $K,$e,$e
- and $b,$c,$t2
- zapnot $a,0xf,$a
-
- srl $a,27,$t0
- addl @X[$i%16],$e,$e
- bic $d,$b,$t3
- sll $b,30,$b
-
- or $t2,$t3,$t2
- addl $t1,$e,$e
- srl $b,32,$t3
- zapnot @X[$i],0xf,@X[$i]
-
- addl $t0,$e,$e
- addl $t2,$e,$e
- or $t3,$b,$b
-___
-$code.=<<___ if ($i>=15); # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- and $b,$c,$t2
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
-
- zapnot $a,0xf,$a
- addl @X[$i%16],$e,$e
- bic $d,$b,$t3
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
-
- srl $a,27,$t0
- addl $t1,$e,$e
- or $t2,$t3,$t2
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
-
- sll $b,30,$b
- addl $t0,$e,$e
- srl @X[$j%16],31,$t1
-
- addl $t2,$e,$e
- srl $b,32,$t3
- addl @X[$j%16],@X[$j%16],@X[$j%16]
-
- or $t3,$b,$b
- zapnot @X[$i%16],0xf,@X[$i%16]
- or $t1,@X[$j%16],@X[$j%16]
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<79); # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
-
- sll $b,30,$t3
- addl $t1,$e,$e
- xor $b,$c,$t2
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
-
- srl $b,2,$b
- addl @X[$i%16],$e,$e
- xor $d,$t2,$t2
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
-
- srl @X[$j%16],31,$t1
- addl $t2,$e,$e
- srl $a,27,$t0
- addl @X[$j%16],@X[$j%16],@X[$j%16]
-
- or $t3,$b,$b
- addl $t0,$e,$e
- or $t1,@X[$j%16],@X[$j%16]
-___
-$code.=<<___ if ($i<77);
- zapnot @X[$i%16],0xf,@X[$i%16]
-___
-$code.=<<___ if ($i==79); # with context fetch
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- ldl @X[0],0($ctx)
-
- sll $b,30,$t3
- addl $t1,$e,$e
- xor $b,$c,$t2
- ldl @X[1],4($ctx)
-
- srl $b,2,$b
- addl @X[$i%16],$e,$e
- xor $d,$t2,$t2
- ldl @X[2],8($ctx)
-
- srl $a,27,$t0
- addl $t2,$e,$e
- ldl @X[3],12($ctx)
-
- or $t3,$b,$b
- addl $t0,$e,$e
- ldl @X[4],16($ctx)
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___; # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
-
- srl $a,27,$t0
- and $b,$c,$t2
- and $b,$d,$t3
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
-
- sll $b,30,$b
- addl $t1,$e,$e
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
-
- srl @X[$j%16],31,$t1
- addl $t0,$e,$e
- or $t2,$t3,$t2
- and $c,$d,$t3
-
- or $t2,$t3,$t2
- srl $b,32,$t3
- addl @X[$i%16],$e,$e
- addl @X[$j%16],@X[$j%16],@X[$j%16]
-
- or $t3,$b,$b
- addl $t2,$e,$e
- or $t1,@X[$j%16],@X[$j%16]
- zapnot @X[$i%16],0xf,@X[$i%16]
-___
-}
-
-$code=<<___;
-#ifdef __linux__
-#include <asm/regdef.h>
-#else
-#include <asm.h>
-#include <regdef.h>
-#endif
-
-.text
-
-.set noat
-.set noreorder
-.globl sha1_block_data_order
-.align 5
-.ent sha1_block_data_order
-sha1_block_data_order:
- lda sp,-64(sp)
- stq ra,0(sp)
- stq s0,8(sp)
- stq s1,16(sp)
- stq s2,24(sp)
- stq s3,32(sp)
- stq s4,40(sp)
- stq s5,48(sp)
- stq fp,56(sp)
- .mask 0x0400fe00,-64
- .frame sp,64,ra
- .prologue 0
-
- ldl $A,0($ctx)
- ldl $B,4($ctx)
- sll $num,6,$num
- ldl $C,8($ctx)
- ldl $D,12($ctx)
- ldl $E,16($ctx)
- addq $inp,$num,$num
-
-.Lloop:
- .set noreorder
- ldah $K,23170(zero)
- zapnot $B,0xf,$B
- lda $K,31129($K) # K_00_19
-___
-for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- ldah $K,28378(zero)
- lda $K,-5215($K) # K_20_39
-___
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- ldah $K,-28900(zero)
- lda $K,-17188($K) # K_40_59
-___
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- ldah $K,-13725(zero)
- lda $K,-15914($K) # K_60_79
-___
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- addl @X[0],$A,$A
- addl @X[1],$B,$B
- addl @X[2],$C,$C
- addl @X[3],$D,$D
- addl @X[4],$E,$E
- stl $A,0($ctx)
- stl $B,4($ctx)
- addq $inp,64,$inp
- stl $C,8($ctx)
- stl $D,12($ctx)
- stl $E,16($ctx)
- cmpult $inp,$num,$t1
- bne $t1,.Lloop
-
- .set noreorder
- ldq ra,0(sp)
- ldq s0,8(sp)
- ldq s1,16(sp)
- ldq s2,24(sp)
- ldq s3,32(sp)
- ldq s4,40(sp)
- ldq s5,48(sp)
- ldq fp,56(sp)
- lda sp,64(sp)
- ret (ra)
-.end sha1_block_data_order
-.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-___
-$output=shift and open STDOUT,">$output";
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.S b/app/openssl/crypto/sha/asm/sha1-armv4-large.S
deleted file mode 100644
index a1562883..00000000
--- a/app/openssl/crypto/sha/asm/sha1-armv4-large.S
+++ /dev/null
@@ -1,1450 +0,0 @@
-#include "arm_arch.h"
-
-.text
-.code 32
-
-.global sha1_block_data_order
-.type sha1_block_data_order,%function
-
-.align 5
-sha1_block_data_order:
-#if __ARM_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#ARMV8_SHA1
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
- stmdb sp!,{r4-r12,lr}
- add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
- ldmia r0,{r3,r4,r5,r6,r7}
-.Lloop:
- ldr r8,.LK_00_19
- mov r14,sp
- sub sp,sp,#15*4
- mov r5,r5,ror#30
- mov r6,r6,ror#30
- mov r7,r7,ror#30 @ [6]
-.L_00_15:
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r7,r8,r7,ror#2 @ E+=K_00_19
- eor r10,r5,r6 @ F_xx_xx
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r4,r10,ror#2
- add r7,r7,r9 @ E+=X[i]
- eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r7,r7,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r6,r8,r6,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r4,r5 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r6,r8,r6,ror#2 @ E+=K_00_19
- eor r10,r4,r5 @ F_xx_xx
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r3,r10,ror#2
- add r6,r6,r9 @ E+=X[i]
- eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r6,r6,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r5,r8,r5,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r3,r4 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r5,r8,r5,ror#2 @ E+=K_00_19
- eor r10,r3,r4 @ F_xx_xx
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r7,r10,ror#2
- add r5,r5,r9 @ E+=X[i]
- eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r5,r5,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r4,r8,r4,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r7,r3 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r4,r8,r4,ror#2 @ E+=K_00_19
- eor r10,r7,r3 @ F_xx_xx
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r6,r10,ror#2
- add r4,r4,r9 @ E+=X[i]
- eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r4,r4,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r3,r8,r3,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r6,r7 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r3,r8,r3,ror#2 @ E+=K_00_19
- eor r10,r6,r7 @ F_xx_xx
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r5,r10,ror#2
- add r3,r3,r9 @ E+=X[i]
- eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r3,r3,r10 @ E+=F_00_19(B,C,D)
- teq r14,sp
- bne .L_00_15 @ [((11+4)*5+2)*3]
- sub sp,sp,#25*4
-#if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
-#else
- ldr r9,[r1],#4 @ handles unaligned
- add r7,r8,r7,ror#2 @ E+=K_00_19
- eor r10,r5,r6 @ F_xx_xx
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev r9,r9 @ byte swap
-#endif
-#endif
- and r10,r4,r10,ror#2
- add r7,r7,r9 @ E+=X[i]
- eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r7,r7,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r3,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
- add r6,r6,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r7,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
- add r5,r5,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r6,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
- add r4,r4,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r5,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
- add r3,r3,r10 @ E+=F_00_19(B,C,D)
-
- ldr r8,.LK_20_39 @ [+15+16*4]
- cmn sp,#0 @ [+3], clear carry to denote 20_39
-.L_20_39_or_60_79:
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r7,r8,r7,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r5,r6 @ F_xx_xx
- mov r9,r9,ror#31
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r4,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r7,r7,r9 @ E+=X[i]
- add r7,r7,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r3,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- add r6,r6,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r7,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- add r5,r5,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r6,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- add r4,r4,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r5,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- add r3,r3,r10 @ E+=F_20_39(B,C,D)
- teq r14,sp @ preserve carry
- bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
- bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
-
- ldr r8,.LK_40_59
- sub sp,sp,#20*4 @ [+2]
-.L_40_59:
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r7,r8,r7,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r5,r6 @ F_xx_xx
- mov r9,r9,ror#31
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r4,r10,ror#2 @ F_xx_xx
- and r11,r5,r6 @ F_xx_xx
- add r7,r7,r9 @ E+=X[i]
- add r7,r7,r10 @ E+=F_40_59(B,C,D)
- add r7,r7,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r3,r10,ror#2 @ F_xx_xx
- and r11,r4,r5 @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- add r6,r6,r10 @ E+=F_40_59(B,C,D)
- add r6,r6,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r7,r10,ror#2 @ F_xx_xx
- and r11,r3,r4 @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- add r5,r5,r10 @ E+=F_40_59(B,C,D)
- add r5,r5,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r6,r10,ror#2 @ F_xx_xx
- and r11,r7,r3 @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- add r4,r4,r10 @ E+=F_40_59(B,C,D)
- add r4,r4,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r5,r10,ror#2 @ F_xx_xx
- and r11,r6,r7 @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- add r3,r3,r10 @ E+=F_40_59(B,C,D)
- add r3,r3,r11,ror#2
- teq r14,sp
- bne .L_40_59 @ [+((12+5)*5+2)*4]
-
- ldr r8,.LK_60_79
- sub sp,sp,#20*4
- cmp sp,#0 @ set carry to denote 60_79
- b .L_20_39_or_60_79 @ [+4], spare 300 bytes
-.L_done:
- add sp,sp,#80*4 @ "deallocate" stack frame
- ldmia r0,{r8,r9,r10,r11,r12}
- add r3,r8,r3
- add r4,r9,r4
- add r5,r10,r5,ror#2
- add r6,r11,r6,ror#2
- add r7,r12,r7,ror#2
- stmia r0,{r3,r4,r5,r6,r7}
- teq r1,r2
- bne .Lloop @ [+18], total 1307
-
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size sha1_block_data_order,.-sha1_block_data_order
-
-.align 5
-.LK_00_19: .word 0x5a827999
-.LK_20_39: .word 0x6ed9eba1
-.LK_40_59: .word 0x8f1bbcdc
-.LK_60_79: .word 0xca62c1d6
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha1_block_data_order
-.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 5
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.type sha1_block_data_order_neon,%function
-.align 4
-sha1_block_data_order_neon:
-.LNEON:
- stmdb sp!,{r4-r12,lr}
- add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
- @ dmb @ errata #451034 on early Cortex A8
- @ vstmdb sp!,{d8-d15} @ ABI specification says so
- mov r14,sp
- sub sp,sp,#64 @ alloca
- adr r8,.LK_00_19
- bic sp,sp,#15 @ align for 128-bit stores
-
- ldmia r0,{r3,r4,r5,r6,r7} @ load context
- mov r12,sp
-
- vld1.8 {q0-q1},[r1]! @ handles unaligned
- veor q15,q15,q15
- vld1.8 {q2-q3},[r1]!
- vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19
- vrev32.8 q0,q0 @ yes, even on
- vrev32.8 q1,q1 @ big-endian...
- vrev32.8 q2,q2
- vadd.i32 q8,q0,q14
- vrev32.8 q3,q3
- vadd.i32 q9,q1,q14
- vst1.32 {q8},[r12,:128]!
- vadd.i32 q10,q2,q14
- vst1.32 {q9},[r12,:128]!
- vst1.32 {q10},[r12,:128]!
- ldr r9,[sp] @ big RAW stall
-
-.Loop_neon:
- vext.8 q8,q0,q1,#8
- bic r10,r6,r4
- add r7,r7,r9
- and r11,r5,r4
- vadd.i32 q13,q3,q14
- ldr r9,[sp,#4]
- add r7,r7,r3,ror#27
- vext.8 q12,q3,q15,#4
- eor r11,r11,r10
- mov r4,r4,ror#2
- add r7,r7,r11
- veor q8,q8,q0
- bic r10,r5,r3
- add r6,r6,r9
- veor q12,q12,q2
- and r11,r4,r3
- ldr r9,[sp,#8]
- veor q12,q12,q8
- add r6,r6,r7,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q13,q15,q12,#4
- bic r10,r4,r7
- add r5,r5,r9
- vadd.i32 q8,q12,q12
- and r11,r3,r7
- ldr r9,[sp,#12]
- vsri.32 q8,q12,#31
- add r5,r5,r6,ror#27
- eor r11,r11,r10
- mov r7,r7,ror#2
- vshr.u32 q12,q13,#30
- add r5,r5,r11
- bic r10,r3,r6
- vshl.u32 q13,q13,#2
- add r4,r4,r9
- and r11,r7,r6
- veor q8,q8,q12
- ldr r9,[sp,#16]
- add r4,r4,r5,ror#27
- veor q8,q8,q13
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q9,q1,q2,#8
- bic r10,r7,r5
- add r3,r3,r9
- and r11,r6,r5
- vadd.i32 q13,q8,q14
- ldr r9,[sp,#20]
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r4,ror#27
- vext.8 q12,q8,q15,#4
- eor r11,r11,r10
- mov r5,r5,ror#2
- add r3,r3,r11
- veor q9,q9,q1
- bic r10,r6,r4
- add r7,r7,r9
- veor q12,q12,q3
- and r11,r5,r4
- ldr r9,[sp,#24]
- veor q12,q12,q9
- add r7,r7,r3,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q13,q15,q12,#4
- bic r10,r5,r3
- add r6,r6,r9
- vadd.i32 q9,q12,q12
- and r11,r4,r3
- ldr r9,[sp,#28]
- vsri.32 q9,q12,#31
- add r6,r6,r7,ror#27
- eor r11,r11,r10
- mov r3,r3,ror#2
- vshr.u32 q12,q13,#30
- add r6,r6,r11
- bic r10,r4,r7
- vshl.u32 q13,q13,#2
- add r5,r5,r9
- and r11,r3,r7
- veor q9,q9,q12
- ldr r9,[sp,#32]
- add r5,r5,r6,ror#27
- veor q9,q9,q13
- eor r11,r11,r10
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q10,q2,q3,#8
- bic r10,r3,r6
- add r4,r4,r9
- and r11,r7,r6
- vadd.i32 q13,q9,q14
- ldr r9,[sp,#36]
- add r4,r4,r5,ror#27
- vext.8 q12,q9,q15,#4
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- veor q10,q10,q2
- bic r10,r7,r5
- add r3,r3,r9
- veor q12,q12,q8
- and r11,r6,r5
- ldr r9,[sp,#40]
- veor q12,q12,q10
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q13,q15,q12,#4
- bic r10,r6,r4
- add r7,r7,r9
- vadd.i32 q10,q12,q12
- and r11,r5,r4
- ldr r9,[sp,#44]
- vsri.32 q10,q12,#31
- add r7,r7,r3,ror#27
- eor r11,r11,r10
- mov r4,r4,ror#2
- vshr.u32 q12,q13,#30
- add r7,r7,r11
- bic r10,r5,r3
- vshl.u32 q13,q13,#2
- add r6,r6,r9
- and r11,r4,r3
- veor q10,q10,q12
- ldr r9,[sp,#48]
- add r6,r6,r7,ror#27
- veor q10,q10,q13
- eor r11,r11,r10
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q11,q3,q8,#8
- bic r10,r4,r7
- add r5,r5,r9
- and r11,r3,r7
- vadd.i32 q13,q10,q14
- ldr r9,[sp,#52]
- add r5,r5,r6,ror#27
- vext.8 q12,q10,q15,#4
- eor r11,r11,r10
- mov r7,r7,ror#2
- add r5,r5,r11
- veor q11,q11,q3
- bic r10,r3,r6
- add r4,r4,r9
- veor q12,q12,q9
- and r11,r7,r6
- ldr r9,[sp,#56]
- veor q12,q12,q11
- add r4,r4,r5,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q13,q15,q12,#4
- bic r10,r7,r5
- add r3,r3,r9
- vadd.i32 q11,q12,q12
- and r11,r6,r5
- ldr r9,[sp,#60]
- vsri.32 q11,q12,#31
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- mov r5,r5,ror#2
- vshr.u32 q12,q13,#30
- add r3,r3,r11
- bic r10,r6,r4
- vshl.u32 q13,q13,#2
- add r7,r7,r9
- and r11,r5,r4
- veor q11,q11,q12
- ldr r9,[sp,#0]
- add r7,r7,r3,ror#27
- veor q11,q11,q13
- eor r11,r11,r10
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q10,q11,#8
- bic r10,r5,r3
- add r6,r6,r9
- and r11,r4,r3
- veor q0,q0,q8
- ldr r9,[sp,#4]
- add r6,r6,r7,ror#27
- veor q0,q0,q1
- eor r11,r11,r10
- mov r3,r3,ror#2
- vadd.i32 q13,q11,q14
- add r6,r6,r11
- bic r10,r4,r7
- veor q12,q12,q0
- add r5,r5,r9
- and r11,r3,r7
- vshr.u32 q0,q12,#30
- ldr r9,[sp,#8]
- add r5,r5,r6,ror#27
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- eor r11,r11,r10
- mov r7,r7,ror#2
- vsli.32 q0,q12,#2
- add r5,r5,r11
- bic r10,r3,r6
- add r4,r4,r9
- and r11,r7,r6
- ldr r9,[sp,#12]
- add r4,r4,r5,ror#27
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- bic r10,r7,r5
- add r3,r3,r9
- and r11,r6,r5
- ldr r9,[sp,#16]
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q11,q0,#8
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#20]
- veor q1,q1,q9
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- veor q1,q1,q2
- mov r4,r4,ror#2
- add r7,r7,r11
- vadd.i32 q13,q0,q14
- eor r10,r3,r5
- add r6,r6,r9
- veor q12,q12,q1
- ldr r9,[sp,#24]
- eor r11,r10,r4
- vshr.u32 q1,q12,#30
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- vst1.32 {q13},[r12,:128]!
- add r6,r6,r11
- eor r10,r7,r4
- vsli.32 q1,q12,#2
- add r5,r5,r9
- ldr r9,[sp,#28]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#32]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q12,q0,q1,#8
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#36]
- veor q2,q2,q10
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- veor q2,q2,q3
- mov r5,r5,ror#2
- add r3,r3,r11
- vadd.i32 q13,q1,q14
- eor r10,r4,r6
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r7,r7,r9
- veor q12,q12,q2
- ldr r9,[sp,#40]
- eor r11,r10,r5
- vshr.u32 q2,q12,#30
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- vst1.32 {q13},[r12,:128]!
- add r7,r7,r11
- eor r10,r3,r5
- vsli.32 q2,q12,#2
- add r6,r6,r9
- ldr r9,[sp,#44]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#48]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q12,q1,q2,#8
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#52]
- veor q3,q3,q11
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- veor q3,q3,q8
- mov r6,r6,ror#2
- add r4,r4,r11
- vadd.i32 q13,q2,q14
- eor r10,r5,r7
- add r3,r3,r9
- veor q12,q12,q3
- ldr r9,[sp,#56]
- eor r11,r10,r6
- vshr.u32 q3,q12,#30
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- vst1.32 {q13},[r12,:128]!
- add r3,r3,r11
- eor r10,r4,r6
- vsli.32 q3,q12,#2
- add r7,r7,r9
- ldr r9,[sp,#60]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#0]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q12,q2,q3,#8
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#4]
- veor q8,q8,q0
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- veor q8,q8,q9
- mov r7,r7,ror#2
- add r5,r5,r11
- vadd.i32 q13,q3,q14
- eor r10,r6,r3
- add r4,r4,r9
- veor q12,q12,q8
- ldr r9,[sp,#8]
- eor r11,r10,r7
- vshr.u32 q8,q12,#30
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- add r4,r4,r11
- eor r10,r5,r7
- vsli.32 q8,q12,#2
- add r3,r3,r9
- ldr r9,[sp,#12]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#16]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q3,q8,#8
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#20]
- veor q9,q9,q1
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- veor q9,q9,q10
- mov r3,r3,ror#2
- add r6,r6,r11
- vadd.i32 q13,q8,q14
- eor r10,r7,r4
- add r5,r5,r9
- veor q12,q12,q9
- ldr r9,[sp,#24]
- eor r11,r10,r3
- vshr.u32 q9,q12,#30
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- vst1.32 {q13},[r12,:128]!
- add r5,r5,r11
- eor r10,r6,r3
- vsli.32 q9,q12,#2
- add r4,r4,r9
- ldr r9,[sp,#28]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#32]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q8,q9,#8
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#36]
- veor q10,q10,q2
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- veor q10,q10,q11
- add r7,r7,r10
- and r11,r11,r4
- vadd.i32 q13,q9,q14
- mov r4,r4,ror#2
- add r7,r7,r11
- veor q12,q12,q10
- add r6,r6,r9
- and r10,r4,r5
- vshr.u32 q10,q12,#30
- ldr r9,[sp,#40]
- add r6,r6,r7,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r4,r5
- add r6,r6,r10
- vsli.32 q10,q12,#2
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#44]
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- add r5,r5,r10
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#48]
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- add r4,r4,r10
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q12,q9,q10,#8
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#52]
- veor q11,q11,q3
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- veor q11,q11,q0
- add r3,r3,r10
- and r11,r11,r5
- vadd.i32 q13,q10,q14
- mov r5,r5,ror#2
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r11
- veor q12,q12,q11
- add r7,r7,r9
- and r10,r5,r6
- vshr.u32 q11,q12,#30
- ldr r9,[sp,#56]
- add r7,r7,r3,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r5,r6
- add r7,r7,r10
- vsli.32 q11,q12,#2
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#60]
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- add r6,r6,r10
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#0]
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- add r5,r5,r10
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q12,q10,q11,#8
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#4]
- veor q0,q0,q8
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- veor q0,q0,q1
- add r4,r4,r10
- and r11,r11,r6
- vadd.i32 q13,q11,q14
- mov r6,r6,ror#2
- add r4,r4,r11
- veor q12,q12,q0
- add r3,r3,r9
- and r10,r6,r7
- vshr.u32 q0,q12,#30
- ldr r9,[sp,#8]
- add r3,r3,r4,ror#27
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- eor r11,r6,r7
- add r3,r3,r10
- vsli.32 q0,q12,#2
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#12]
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- add r7,r7,r10
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#16]
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- add r6,r6,r10
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q12,q11,q0,#8
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#20]
- veor q1,q1,q9
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- veor q1,q1,q2
- add r5,r5,r10
- and r11,r11,r7
- vadd.i32 q13,q0,q14
- mov r7,r7,ror#2
- add r5,r5,r11
- veor q12,q12,q1
- add r4,r4,r9
- and r10,r7,r3
- vshr.u32 q1,q12,#30
- ldr r9,[sp,#24]
- add r4,r4,r5,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r7,r3
- add r4,r4,r10
- vsli.32 q1,q12,#2
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#28]
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- add r3,r3,r10
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#32]
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- add r7,r7,r10
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q0,q1,#8
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#36]
- veor q2,q2,q10
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- veor q2,q2,q3
- add r6,r6,r10
- and r11,r11,r3
- vadd.i32 q13,q1,q14
- mov r3,r3,ror#2
- add r6,r6,r11
- veor q12,q12,q2
- add r5,r5,r9
- and r10,r3,r4
- vshr.u32 q2,q12,#30
- ldr r9,[sp,#40]
- add r5,r5,r6,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r3,r4
- add r5,r5,r10
- vsli.32 q2,q12,#2
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#44]
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- add r4,r4,r10
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#48]
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- add r3,r3,r10
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q1,q2,#8
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#52]
- veor q3,q3,q11
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- veor q3,q3,q8
- mov r4,r4,ror#2
- add r7,r7,r11
- vadd.i32 q13,q2,q14
- eor r10,r3,r5
- add r6,r6,r9
- veor q12,q12,q3
- ldr r9,[sp,#56]
- eor r11,r10,r4
- vshr.u32 q3,q12,#30
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- vst1.32 {q13},[r12,:128]!
- add r6,r6,r11
- eor r10,r7,r4
- vsli.32 q3,q12,#2
- add r5,r5,r9
- ldr r9,[sp,#60]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#0]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- vadd.i32 q13,q3,q14
- eor r10,r5,r7
- add r3,r3,r9
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- teq r1,r2
- sub r8,r8,#16
- subeq r1,r1,#64
- vld1.8 {q0-q1},[r1]!
- ldr r9,[sp,#4]
- eor r11,r10,r6
- vld1.8 {q2-q3},[r1]!
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r11
- eor r10,r4,r6
- vrev32.8 q0,q0
- add r7,r7,r9
- ldr r9,[sp,#8]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#12]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#16]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- vrev32.8 q1,q1
- eor r10,r6,r3
- add r4,r4,r9
- vadd.i32 q8,q0,q14
- ldr r9,[sp,#20]
- eor r11,r10,r7
- vst1.32 {q8},[r12,:128]!
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#24]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#28]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#32]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- vrev32.8 q2,q2
- eor r10,r7,r4
- add r5,r5,r9
- vadd.i32 q9,q1,q14
- ldr r9,[sp,#36]
- eor r11,r10,r3
- vst1.32 {q9},[r12,:128]!
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#40]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#44]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#48]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- vrev32.8 q3,q3
- eor r10,r3,r5
- add r6,r6,r9
- vadd.i32 q10,q2,q14
- ldr r9,[sp,#52]
- eor r11,r10,r4
- vst1.32 {q10},[r12,:128]!
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#56]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#60]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- ldmia r0,{r9,r10,r11,r12} @ accumulate context
- add r3,r3,r9
- ldr r9,[r0,#16]
- add r4,r4,r10
- add r5,r5,r11
- add r6,r6,r12
- moveq sp,r14
- add r7,r7,r9
- ldrne r9,[sp]
- stmia r0,{r3,r4,r5,r6,r7}
- addne r12,sp,#3*16
- bne .Loop_neon
-
- @ vldmia sp!,{d8-d15}
- ldmia sp!,{r4-r12,pc}
-.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
-#endif
-#if __ARM_ARCH__>=7
-.type sha1_block_data_order_armv8,%function
-.align 5
-sha1_block_data_order_armv8:
-.LARMv8:
- vstmdb sp!,{d8-d15} @ ABI specification says so
-
- veor q1,q1,q1
- adr r3,.LK_00_19
- vld1.32 {q0},[r0]!
- vld1.32 {d2[0]},[r0]
- sub r0,r0,#16
- vld1.32 {d16[],d17[]},[r3,:32]!
- vld1.32 {d18[],d19[]},[r3,:32]!
- vld1.32 {d20[],d21[]},[r3,:32]!
- vld1.32 {d22[],d23[]},[r3,:32]
-
-.Loop_v8:
- vld1.8 {q4-q5},[r1]!
- vld1.8 {q6-q7},[r1]!
- vrev32.8 q4,q4
- vrev32.8 q5,q5
-
- vadd.i32 q12,q8,q4
- vrev32.8 q6,q6
- vmov q14,q0 @ offload
- subs r2,r2,#1
-
- vadd.i32 q13,q8,q5
- vrev32.8 q7,q7
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0
- .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12
- vadd.i32 q12,q8,q6
- .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1
- .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
- vadd.i32 q13,q8,q7
- .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
- .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2
- .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
- vadd.i32 q12,q8,q4
- .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
- .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3
- .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
- vadd.i32 q13,q9,q5
- .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
- .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4
- .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
- vadd.i32 q12,q9,q6
- .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
- .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
- vadd.i32 q13,q9,q7
- .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
- .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6
- .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
- vadd.i32 q12,q9,q4
- .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
- .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
- vadd.i32 q13,q9,q5
- .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
- .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8
- .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
- vadd.i32 q12,q10,q6
- .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
- .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
- vadd.i32 q13,q10,q7
- .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
- .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10
- .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
- vadd.i32 q12,q10,q4
- .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
- .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11
- .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
- vadd.i32 q13,q10,q5
- .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
- .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12
- .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
- vadd.i32 q12,q10,q6
- .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
- .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13
- .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
- vadd.i32 q13,q11,q7
- .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
- .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14
- .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
- vadd.i32 q12,q11,q4
- .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
- .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
- vadd.i32 q13,q11,q5
- .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
- .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16
- .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
- vadd.i32 q12,q11,q6
- .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
- vadd.i32 q13,q11,q7
-
- .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18
- .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
-
- .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19
- .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
-
- vadd.i32 q1,q1,q2
- vadd.i32 q0,q0,q14
- bne .Loop_v8
-
- vst1.32 {q0},[r0]!
- vst1.32 {d2[0]},[r0]
-
- vldmia sp!,{d8-d15}
- bx lr @ bx lr
-.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
-#endif
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl b/app/openssl/crypto/sha/asm/sha1-armv4-large.pl
deleted file mode 100644
index 50bd07b3..00000000
--- a/app/openssl/crypto/sha/asm/sha1-armv4-large.pl
+++ /dev/null
@@ -1,678 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# sha1_block procedure for ARMv4.
-#
-# January 2007.
-
-# Size/performance trade-off
-# ====================================================================
-# impl size in bytes comp cycles[*] measured performance
-# ====================================================================
-# thumb 304 3212 4420
-# armv4-small 392/+29% 1958/+64% 2250/+96%
-# armv4-compact 740/+89% 1552/+26% 1840/+22%
-# armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
-# full unroll ~5100/+260% ~1260/+4% ~1300/+5%
-# ====================================================================
-# thumb = same as 'small' but in Thumb instructions[**] and
-# with recurring code in two private functions;
-# small = detached Xload/update, loops are folded;
-# compact = detached Xload/update, 5x unroll;
-# large = interleaved Xload/update, 5x unroll;
-# full unroll = interleaved Xload/update, full unroll, estimated[!];
-#
-# [*] Manually counted instructions in "grand" loop body. Measured
-# performance is affected by prologue and epilogue overhead,
-# i-cache availability, branch penalties, etc.
-# [**] While each Thumb instruction is twice smaller, they are not as
-# diverse as ARM ones: e.g., there are only two arithmetic
-# instructions with 3 arguments, no [fixed] rotate, addressing
-# modes are limited. As result it takes more instructions to do
-# the same job in Thumb, therefore the code is never twice as
-# small and always slower.
-# [***] which is also ~35% better than compiler generated code. Dual-
-# issue Cortex A8 core was measured to process input block in
-# ~990 cycles.
-
-# August 2010.
-#
-# Rescheduling for dual-issue pipeline resulted in 13% improvement on
-# Cortex A8 core and in absolute terms ~870 cycles per input block
-# [or 13.6 cycles per byte].
-
-# February 2011.
-#
-# Profiler-assisted and platform-specific optimization resulted in 10%
-# improvement on Cortex A8 core and 12.2 cycles per byte.
-
-# September 2013.
-#
-# Add NEON implementation (see sha1-586.pl for background info). On
-# Cortex A8 it was measured to process one byte in 6.7 cycles or >80%
-# faster than integer-only code. Because [fully unrolled] NEON code
-# is ~2.5x larger and there are some redundant instructions executed
-# when processing last block, improvement is not as big for smallest
-# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
-# byte, which is also >80% faster than integer-only code.
-
-# May 2014.
-#
-# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-$ctx="r0";
-$inp="r1";
-$len="r2";
-$a="r3";
-$b="r4";
-$c="r5";
-$d="r6";
-$e="r7";
-$K="r8";
-$t0="r9";
-$t1="r10";
-$t2="r11";
-$t3="r12";
-$Xi="r14";
-@V=($a,$b,$c,$d,$e);
-
-sub Xupdate {
-my ($a,$b,$c,$d,$e,$opt1,$opt2)=@_;
-$code.=<<___;
- ldr $t0,[$Xi,#15*4]
- ldr $t1,[$Xi,#13*4]
- ldr $t2,[$Xi,#7*4]
- add $e,$K,$e,ror#2 @ E+=K_xx_xx
- ldr $t3,[$Xi,#2*4]
- eor $t0,$t0,$t1
- eor $t2,$t2,$t3 @ 1 cycle stall
- eor $t1,$c,$d @ F_xx_xx
- mov $t0,$t0,ror#31
- add $e,$e,$a,ror#27 @ E+=ROR(A,27)
- eor $t0,$t0,$t2,ror#31
- str $t0,[$Xi,#-4]!
- $opt1 @ F_xx_xx
- $opt2 @ F_xx_xx
- add $e,$e,$t0 @ E+=X[i]
-___
-}
-
-sub BODY_00_15 {
-my ($a,$b,$c,$d,$e)=@_;
-$code.=<<___;
-#if __ARM_ARCH__<7
- ldrb $t1,[$inp,#2]
- ldrb $t0,[$inp,#3]
- ldrb $t2,[$inp,#1]
- add $e,$K,$e,ror#2 @ E+=K_00_19
- ldrb $t3,[$inp],#4
- orr $t0,$t0,$t1,lsl#8
- eor $t1,$c,$d @ F_xx_xx
- orr $t0,$t0,$t2,lsl#16
- add $e,$e,$a,ror#27 @ E+=ROR(A,27)
- orr $t0,$t0,$t3,lsl#24
-#else
- ldr $t0,[$inp],#4 @ handles unaligned
- add $e,$K,$e,ror#2 @ E+=K_00_19
- eor $t1,$c,$d @ F_xx_xx
- add $e,$e,$a,ror#27 @ E+=ROR(A,27)
-#ifdef __ARMEL__
- rev $t0,$t0 @ byte swap
-#endif
-#endif
- and $t1,$b,$t1,ror#2
- add $e,$e,$t0 @ E+=X[i]
- eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D)
- str $t0,[$Xi,#-4]!
- add $e,$e,$t1 @ E+=F_00_19(B,C,D)
-___
-}
-
-sub BODY_16_19 {
-my ($a,$b,$c,$d,$e)=@_;
- &Xupdate(@_,"and $t1,$b,$t1,ror#2");
-$code.=<<___;
- eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D)
- add $e,$e,$t1 @ E+=F_00_19(B,C,D)
-___
-}
-
-sub BODY_20_39 {
-my ($a,$b,$c,$d,$e)=@_;
- &Xupdate(@_,"eor $t1,$b,$t1,ror#2");
-$code.=<<___;
- add $e,$e,$t1 @ E+=F_20_39(B,C,D)
-___
-}
-
-sub BODY_40_59 {
-my ($a,$b,$c,$d,$e)=@_;
- &Xupdate(@_,"and $t1,$b,$t1,ror#2","and $t2,$c,$d");
-$code.=<<___;
- add $e,$e,$t1 @ E+=F_40_59(B,C,D)
- add $e,$e,$t2,ror#2
-___
-}
-
-$code=<<___;
-#include "arm_arch.h"
-
-.text
-.code 32
-
-.global sha1_block_data_order
-.type sha1_block_data_order,%function
-
-.align 5
-sha1_block_data_order:
-#if __ARM_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#ARMV8_SHA1
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
- stmdb sp!,{r4-r12,lr}
- add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
- ldmia $ctx,{$a,$b,$c,$d,$e}
-.Lloop:
- ldr $K,.LK_00_19
- mov $Xi,sp
- sub sp,sp,#15*4
- mov $c,$c,ror#30
- mov $d,$d,ror#30
- mov $e,$e,ror#30 @ [6]
-.L_00_15:
-___
-for($i=0;$i<5;$i++) {
- &BODY_00_15(@V); unshift(@V,pop(@V));
-}
-$code.=<<___;
- teq $Xi,sp
- bne .L_00_15 @ [((11+4)*5+2)*3]
- sub sp,sp,#25*4
-___
- &BODY_00_15(@V); unshift(@V,pop(@V));
- &BODY_16_19(@V); unshift(@V,pop(@V));
- &BODY_16_19(@V); unshift(@V,pop(@V));
- &BODY_16_19(@V); unshift(@V,pop(@V));
- &BODY_16_19(@V); unshift(@V,pop(@V));
-$code.=<<___;
-
- ldr $K,.LK_20_39 @ [+15+16*4]
- cmn sp,#0 @ [+3], clear carry to denote 20_39
-.L_20_39_or_60_79:
-___
-for($i=0;$i<5;$i++) {
- &BODY_20_39(@V); unshift(@V,pop(@V));
-}
-$code.=<<___;
- teq $Xi,sp @ preserve carry
- bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
- bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
-
- ldr $K,.LK_40_59
- sub sp,sp,#20*4 @ [+2]
-.L_40_59:
-___
-for($i=0;$i<5;$i++) {
- &BODY_40_59(@V); unshift(@V,pop(@V));
-}
-$code.=<<___;
- teq $Xi,sp
- bne .L_40_59 @ [+((12+5)*5+2)*4]
-
- ldr $K,.LK_60_79
- sub sp,sp,#20*4
- cmp sp,#0 @ set carry to denote 60_79
- b .L_20_39_or_60_79 @ [+4], spare 300 bytes
-.L_done:
- add sp,sp,#80*4 @ "deallocate" stack frame
- ldmia $ctx,{$K,$t0,$t1,$t2,$t3}
- add $a,$K,$a
- add $b,$t0,$b
- add $c,$t1,$c,ror#2
- add $d,$t2,$d,ror#2
- add $e,$t3,$e,ror#2
- stmia $ctx,{$a,$b,$c,$d,$e}
- teq $inp,$len
- bne .Lloop @ [+18], total 1307
-
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- bx lr @ interoperable with Thumb ISA:-)
-#endif
-.size sha1_block_data_order,.-sha1_block_data_order
-
-.align 5
-.LK_00_19: .word 0x5a827999
-.LK_20_39: .word 0x6ed9eba1
-.LK_40_59: .word 0x8f1bbcdc
-.LK_60_79: .word 0xca62c1d6
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha1_block_data_order
-.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
-.align 5
-___
-#####################################################################
-# NEON stuff
-#
-{{{
-my @V=($a,$b,$c,$d,$e);
-my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14));
-my $Xi=4;
-my @X=map("q$_",(8..11,0..3));
-my @Tx=("q12","q13");
-my ($K,$zero)=("q14","q15");
-my $j=0;
-
-sub AUTOLOAD() # thunk [simplified] x86-style perlasm
-{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
- my $arg = pop;
- $arg = "#$arg" if ($arg*1 eq $arg);
- $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
-}
-
-sub body_00_19 () {
- (
- '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
- '&bic ($t0,$d,$b)',
- '&add ($e,$e,$Ki)', # e+=X[i]+K
- '&and ($t1,$c,$b)',
- '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
- '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
- '&eor ($t1,$t1,$t0)', # F_00_19
- '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
- '&add ($e,$e,$t1);'. # e+=F_00_19
- '$j++; unshift(@V,pop(@V));'
- )
-}
-sub body_20_39 () {
- (
- '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
- '&eor ($t0,$b,$d)',
- '&add ($e,$e,$Ki)', # e+=X[i]+K
- '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)',
- '&eor ($t1,$t0,$c)', # F_20_39
- '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
- '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
- '&add ($e,$e,$t1);'. # e+=F_20_39
- '$j++; unshift(@V,pop(@V));'
- )
-}
-sub body_40_59 () {
- (
- '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
- '&add ($e,$e,$Ki)', # e+=X[i]+K
- '&and ($t0,$c,$d)',
- '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
- '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
- '&eor ($t1,$c,$d)',
- '&add ($e,$e,$t0)',
- '&and ($t1,$t1,$b)',
- '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
- '&add ($e,$e,$t1);'. # e+=F_40_59
- '$j++; unshift(@V,pop(@V));'
- )
-}
-
-sub Xupdate_16_31 ()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e);
-
- &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@Tx[1],@X[-1&7],$K);
- eval(shift(@insns));
- &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
- eval(shift(@insns));
- &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8]
- eval(shift(@insns));
- eval(shift(@insns));
- &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
- &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
- eval(shift(@insns));
- eval(shift(@insns));
- &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@X[0],@Tx[0],@Tx[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 (@Tx[0],@Tx[1],30);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshl_u32 (@Tx[1],@Tx[1],2);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@X[0],@X[0],@Tx[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xupdate_32_79 ()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e);
-
- &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
- &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@Tx[1],@X[-1&7],$K);
- eval(shift(@insns));
- &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
- eval(shift(@insns));
- &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]"
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 (@X[0],@Tx[0],30);
- eval(shift(@insns));
- eval(shift(@insns));
- &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
- &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xuplast_80 ()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e);
-
- &vadd_i32 (@Tx[1],@X[-1&7],$K);
- eval(shift(@insns));
- eval(shift(@insns));
- &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!");
- &sub ($Xfer,$Xfer,64);
-
- &teq ($inp,$len);
- &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
- &subeq ($inp,$inp,64); # reload last block to avoid SEGV
- &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
- eval(shift(@insns));
- eval(shift(@insns));
- &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!");
- eval(shift(@insns));
- eval(shift(@insns));
- &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19
- eval(shift(@insns));
- eval(shift(@insns));
- &vrev32_8 (@X[-4&7],@X[-4&7]);
-
- foreach (@insns) { eval; } # remaining instructions
-
- $Xi=0;
-}
-
-sub Xloop()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e);
-
- &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K);
- eval(shift(@insns));
- eval(shift(@insns));
- &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU
-
- foreach (@insns) { eval; }
-
- $Xi++;
-}
-
-$code.=<<___;
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.type sha1_block_data_order_neon,%function
-.align 4
-sha1_block_data_order_neon:
-.LNEON:
- stmdb sp!,{r4-r12,lr}
- add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
- @ dmb @ errata #451034 on early Cortex A8
- @ vstmdb sp!,{d8-d15} @ ABI specification says so
- mov $saved_sp,sp
- sub sp,sp,#64 @ alloca
- adr $K_XX_XX,.LK_00_19
- bic sp,sp,#15 @ align for 128-bit stores
-
- ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
- mov $Xfer,sp
-
- vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
- veor $zero,$zero,$zero
- vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]!
- vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19
- vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on
- vrev32.8 @X[-3&7],@X[-3&7] @ big-endian...
- vrev32.8 @X[-2&7],@X[-2&7]
- vadd.i32 @X[0],@X[-4&7],$K
- vrev32.8 @X[-1&7],@X[-1&7]
- vadd.i32 @X[1],@X[-3&7],$K
- vst1.32 {@X[0]},[$Xfer,:128]!
- vadd.i32 @X[2],@X[-2&7],$K
- vst1.32 {@X[1]},[$Xfer,:128]!
- vst1.32 {@X[2]},[$Xfer,:128]!
- ldr $Ki,[sp] @ big RAW stall
-
-.Loop_neon:
-___
- &Xupdate_16_31(\&body_00_19);
- &Xupdate_16_31(\&body_00_19);
- &Xupdate_16_31(\&body_00_19);
- &Xupdate_16_31(\&body_00_19);
- &Xupdate_32_79(\&body_00_19);
- &Xupdate_32_79(\&body_20_39);
- &Xupdate_32_79(\&body_20_39);
- &Xupdate_32_79(\&body_20_39);
- &Xupdate_32_79(\&body_20_39);
- &Xupdate_32_79(\&body_20_39);
- &Xupdate_32_79(\&body_40_59);
- &Xupdate_32_79(\&body_40_59);
- &Xupdate_32_79(\&body_40_59);
- &Xupdate_32_79(\&body_40_59);
- &Xupdate_32_79(\&body_40_59);
- &Xupdate_32_79(\&body_20_39);
- &Xuplast_80(\&body_20_39);
- &Xloop(\&body_20_39);
- &Xloop(\&body_20_39);
- &Xloop(\&body_20_39);
-$code.=<<___;
- ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context
- add $a,$a,$Ki
- ldr $Ki,[$ctx,#16]
- add $b,$b,$t0
- add $c,$c,$t1
- add $d,$d,$Xfer
- moveq sp,$saved_sp
- add $e,$e,$Ki
- ldrne $Ki,[sp]
- stmia $ctx,{$a,$b,$c,$d,$e}
- addne $Xfer,sp,#3*16
- bne .Loop_neon
-
- @ vldmia sp!,{d8-d15}
- ldmia sp!,{r4-r12,pc}
-.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
-#endif
-___
-}}}
-#####################################################################
-# ARMv8 stuff
-#
-{{{
-my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3));
-my @MSG=map("q$_",(4..7));
-my @Kxx=map("q$_",(8..11));
-my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
-
-$code.=<<___;
-#if __ARM_ARCH__>=7
-.type sha1_block_data_order_armv8,%function
-.align 5
-sha1_block_data_order_armv8:
-.LARMv8:
- vstmdb sp!,{d8-d15} @ ABI specification says so
-
- veor $E,$E,$E
- adr r3,.LK_00_19
- vld1.32 {$ABCD},[$ctx]!
- vld1.32 {$E\[0]},[$ctx]
- sub $ctx,$ctx,#16
- vld1.32 {@Kxx[0]\[]},[r3,:32]!
- vld1.32 {@Kxx[1]\[]},[r3,:32]!
- vld1.32 {@Kxx[2]\[]},[r3,:32]!
- vld1.32 {@Kxx[3]\[]},[r3,:32]
-
-.Loop_v8:
- vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
- vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
- vrev32.8 @MSG[0],@MSG[0]
- vrev32.8 @MSG[1],@MSG[1]
-
- vadd.i32 $W0,@Kxx[0],@MSG[0]
- vrev32.8 @MSG[2],@MSG[2]
- vmov $ABCD_SAVE,$ABCD @ offload
- subs $len,$len,#1
-
- vadd.i32 $W1,@Kxx[0],@MSG[1]
- vrev32.8 @MSG[3],@MSG[3]
- sha1h $E1,$ABCD @ 0
- sha1c $ABCD,$E,$W0
- vadd.i32 $W0,@Kxx[$j],@MSG[2]
- sha1su0 @MSG[0],@MSG[1],@MSG[2]
-___
-for ($j=0,$i=1;$i<20-3;$i++) {
-my $f=("c","p","m","p")[$i/5];
-$code.=<<___;
- sha1h $E0,$ABCD @ $i
- sha1$f $ABCD,$E1,$W1
- vadd.i32 $W1,@Kxx[$j],@MSG[3]
- sha1su1 @MSG[0],@MSG[3]
-___
-$code.=<<___ if ($i<20-4);
- sha1su0 @MSG[1],@MSG[2],@MSG[3]
-___
- ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
- push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
-}
-$code.=<<___;
- sha1h $E0,$ABCD @ $i
- sha1p $ABCD,$E1,$W1
- vadd.i32 $W1,@Kxx[$j],@MSG[3]
-
- sha1h $E1,$ABCD @ 18
- sha1p $ABCD,$E0,$W0
-
- sha1h $E0,$ABCD @ 19
- sha1p $ABCD,$E1,$W1
-
- vadd.i32 $E,$E,$E0
- vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
- bne .Loop_v8
-
- vst1.32 {$ABCD},[$ctx]!
- vst1.32 {$E\[0]},[$ctx]
-
- vldmia sp!,{d8-d15}
- ret @ bx lr
-.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
-#endif
-___
-}}}
-$code.=<<___;
-.comm OPENSSL_armcap_P,4,4
-___
-
-{ my %opcode = (
- "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40,
- "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40,
- "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 );
-
- sub unsha1 {
- my ($mnemonic,$arg)=@_;
-
- if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
- my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
- |(($2&7)<<17)|(($2&8)<<4)
- |(($3&7)<<1) |(($3&8)<<2);
- # since ARMv7 instructions are always encoded little-endian.
- # correct solution is to use .inst directive, but older
- # assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
- $word&0xff,($word>>8)&0xff,
- ($word>>16)&0xff,($word>>24)&0xff,
- $mnemonic,$arg;
- }
- }
-}
-
-foreach (split($/,$code)) {
- s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or
- s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo;
-
- s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
-
- s/\bret\b/bx lr/o or
- s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4
-
- print $_,$/;
-}
-
-close STDOUT; # enforce flush
diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.S b/app/openssl/crypto/sha/asm/sha1-armv8.S
deleted file mode 100644
index f9d12625..00000000
--- a/app/openssl/crypto/sha/asm/sha1-armv8.S
+++ /dev/null
@@ -1,1211 +0,0 @@
-#include "arm_arch.h"
-
-.text
-
-.globl sha1_block_data_order
-.type sha1_block_data_order,%function
-.align 6
-sha1_block_data_order:
- ldr x16,.LOPENSSL_armcap_P
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA1
- b.ne .Lv8_entry
-
- stp x29,x30,[sp,#-96]!
- add x29,sp,#0
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
-
- ldp w20,w21,[x0]
- ldp w22,w23,[x0,#8]
- ldr w24,[x0,#16]
-
-.Loop:
- ldr x3,[x1],#64
- movz w28,#0x7999
- sub x2,x2,#1
- movk w28,#0x5a82,lsl#16
-#ifdef __ARMEB__
- ror x3,x3,#32
-#else
- rev32 x3,x3
-#endif
- add w24,w24,w28 // warm it up
- add w24,w24,w3
- lsr x4,x3,#32
- ldr x5,[x1,#-56]
- bic w25,w23,w21
- and w26,w22,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- orr w25,w25,w26
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- add w23,w23,w4 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x5,x5,#32
-#else
- rev32 x5,x5
-#endif
- bic w25,w22,w20
- and w26,w21,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- orr w25,w25,w26
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- add w22,w22,w5 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- lsr x6,x5,#32
- ldr x7,[x1,#-48]
- bic w25,w21,w24
- and w26,w20,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- orr w25,w25,w26
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- add w21,w21,w6 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x7,x7,#32
-#else
- rev32 x7,x7
-#endif
- bic w25,w20,w23
- and w26,w24,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- orr w25,w25,w26
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- add w20,w20,w7 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- lsr x8,x7,#32
- ldr x9,[x1,#-40]
- bic w25,w24,w22
- and w26,w23,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- orr w25,w25,w26
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- add w24,w24,w8 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x9,x9,#32
-#else
- rev32 x9,x9
-#endif
- bic w25,w23,w21
- and w26,w22,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- orr w25,w25,w26
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- add w23,w23,w9 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- lsr x10,x9,#32
- ldr x11,[x1,#-32]
- bic w25,w22,w20
- and w26,w21,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- orr w25,w25,w26
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- add w22,w22,w10 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x11,x11,#32
-#else
- rev32 x11,x11
-#endif
- bic w25,w21,w24
- and w26,w20,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- orr w25,w25,w26
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- add w21,w21,w11 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- lsr x12,x11,#32
- ldr x13,[x1,#-24]
- bic w25,w20,w23
- and w26,w24,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- orr w25,w25,w26
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- add w20,w20,w12 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x13,x13,#32
-#else
- rev32 x13,x13
-#endif
- bic w25,w24,w22
- and w26,w23,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- orr w25,w25,w26
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- add w24,w24,w13 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- lsr x14,x13,#32
- ldr x15,[x1,#-16]
- bic w25,w23,w21
- and w26,w22,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- orr w25,w25,w26
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- add w23,w23,w14 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x15,x15,#32
-#else
- rev32 x15,x15
-#endif
- bic w25,w22,w20
- and w26,w21,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- orr w25,w25,w26
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- add w22,w22,w15 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- lsr x16,x15,#32
- ldr x17,[x1,#-8]
- bic w25,w21,w24
- and w26,w20,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- orr w25,w25,w26
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- add w21,w21,w16 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
-#ifdef __ARMEB__
- ror x17,x17,#32
-#else
- rev32 x17,x17
-#endif
- bic w25,w20,w23
- and w26,w24,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- orr w25,w25,w26
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- add w20,w20,w17 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- lsr x19,x17,#32
- eor w3,w3,w5
- bic w25,w24,w22
- and w26,w23,w22
- ror w27,w21,#27
- eor w3,w3,w11
- add w24,w24,w28 // future e+=K
- orr w25,w25,w26
- add w20,w20,w27 // e+=rot(a,5)
- eor w3,w3,w16
- ror w22,w22,#2
- add w24,w24,w19 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
- bic w25,w23,w21
- and w26,w22,w21
- ror w27,w20,#27
- eor w4,w4,w12
- add w23,w23,w28 // future e+=K
- orr w25,w25,w26
- add w24,w24,w27 // e+=rot(a,5)
- eor w4,w4,w17
- ror w21,w21,#2
- add w23,w23,w3 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
- bic w25,w22,w20
- and w26,w21,w20
- ror w27,w24,#27
- eor w5,w5,w13
- add w22,w22,w28 // future e+=K
- orr w25,w25,w26
- add w23,w23,w27 // e+=rot(a,5)
- eor w5,w5,w19
- ror w20,w20,#2
- add w22,w22,w4 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
- bic w25,w21,w24
- and w26,w20,w24
- ror w27,w23,#27
- eor w6,w6,w14
- add w21,w21,w28 // future e+=K
- orr w25,w25,w26
- add w22,w22,w27 // e+=rot(a,5)
- eor w6,w6,w3
- ror w24,w24,#2
- add w21,w21,w5 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
- bic w25,w20,w23
- and w26,w24,w23
- ror w27,w22,#27
- eor w7,w7,w15
- add w20,w20,w28 // future e+=K
- orr w25,w25,w26
- add w21,w21,w27 // e+=rot(a,5)
- eor w7,w7,w4
- ror w23,w23,#2
- add w20,w20,w6 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- movz w28,#0xeba1
- movk w28,#0x6ed9,lsl#16
- eor w8,w8,w10
- bic w25,w24,w22
- and w26,w23,w22
- ror w27,w21,#27
- eor w8,w8,w16
- add w24,w24,w28 // future e+=K
- orr w25,w25,w26
- add w20,w20,w27 // e+=rot(a,5)
- eor w8,w8,w5
- ror w22,w22,#2
- add w24,w24,w7 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w9,w9,w17
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w9,w9,w6
- add w23,w23,w8 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w10,w10,w19
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w10,w10,w7
- add w22,w22,w9 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w11,w11,w3
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w11,w11,w8
- add w21,w21,w10 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- eor w12,w12,w14
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w12,w12,w4
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w12,w12,w9
- add w20,w20,w11 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- eor w13,w13,w15
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w13,w13,w5
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w13,w13,w10
- add w24,w24,w12 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- eor w14,w14,w16
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w14,w14,w6
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w14,w14,w11
- add w23,w23,w13 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- eor w15,w15,w17
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w15,w15,w7
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w15,w15,w12
- add w22,w22,w14 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- eor w16,w16,w19
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w16,w16,w8
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w16,w16,w13
- add w21,w21,w15 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w17,w17,w9
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w17,w17,w14
- add w20,w20,w16 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w19,w19,w10
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w19,w19,w15
- add w24,w24,w17 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- eor w3,w3,w5
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w3,w3,w11
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w3,w3,w16
- add w23,w23,w19 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w4,w4,w12
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w4,w4,w17
- add w22,w22,w3 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w5,w5,w13
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w5,w5,w19
- add w21,w21,w4 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w6,w6,w14
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w6,w6,w3
- add w20,w20,w5 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w7,w7,w15
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w7,w7,w4
- add w24,w24,w6 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- eor w8,w8,w10
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w8,w8,w16
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w8,w8,w5
- add w23,w23,w7 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w9,w9,w17
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w9,w9,w6
- add w22,w22,w8 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w10,w10,w19
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w10,w10,w7
- add w21,w21,w9 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w11,w11,w3
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w11,w11,w8
- add w20,w20,w10 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- movz w28,#0xbcdc
- movk w28,#0x8f1b,lsl#16
- eor w12,w12,w14
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w12,w12,w4
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w12,w12,w9
- add w24,w24,w11 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- orr w25,w21,w22
- and w26,w21,w22
- eor w13,w13,w15
- ror w27,w20,#27
- and w25,w25,w23
- add w23,w23,w28 // future e+=K
- eor w13,w13,w5
- add w24,w24,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w21,w21,#2
- eor w13,w13,w10
- add w23,w23,w12 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- orr w25,w20,w21
- and w26,w20,w21
- eor w14,w14,w16
- ror w27,w24,#27
- and w25,w25,w22
- add w22,w22,w28 // future e+=K
- eor w14,w14,w6
- add w23,w23,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w20,w20,#2
- eor w14,w14,w11
- add w22,w22,w13 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- orr w25,w24,w20
- and w26,w24,w20
- eor w15,w15,w17
- ror w27,w23,#27
- and w25,w25,w21
- add w21,w21,w28 // future e+=K
- eor w15,w15,w7
- add w22,w22,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w24,w24,#2
- eor w15,w15,w12
- add w21,w21,w14 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- orr w25,w23,w24
- and w26,w23,w24
- eor w16,w16,w19
- ror w27,w22,#27
- and w25,w25,w20
- add w20,w20,w28 // future e+=K
- eor w16,w16,w8
- add w21,w21,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w23,w23,#2
- eor w16,w16,w13
- add w20,w20,w15 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- orr w25,w22,w23
- and w26,w22,w23
- eor w17,w17,w3
- ror w27,w21,#27
- and w25,w25,w24
- add w24,w24,w28 // future e+=K
- eor w17,w17,w9
- add w20,w20,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w22,w22,#2
- eor w17,w17,w14
- add w24,w24,w16 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- orr w25,w21,w22
- and w26,w21,w22
- eor w19,w19,w4
- ror w27,w20,#27
- and w25,w25,w23
- add w23,w23,w28 // future e+=K
- eor w19,w19,w10
- add w24,w24,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w21,w21,#2
- eor w19,w19,w15
- add w23,w23,w17 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- orr w25,w20,w21
- and w26,w20,w21
- eor w3,w3,w5
- ror w27,w24,#27
- and w25,w25,w22
- add w22,w22,w28 // future e+=K
- eor w3,w3,w11
- add w23,w23,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w20,w20,#2
- eor w3,w3,w16
- add w22,w22,w19 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- orr w25,w24,w20
- and w26,w24,w20
- eor w4,w4,w6
- ror w27,w23,#27
- and w25,w25,w21
- add w21,w21,w28 // future e+=K
- eor w4,w4,w12
- add w22,w22,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w24,w24,#2
- eor w4,w4,w17
- add w21,w21,w3 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- orr w25,w23,w24
- and w26,w23,w24
- eor w5,w5,w7
- ror w27,w22,#27
- and w25,w25,w20
- add w20,w20,w28 // future e+=K
- eor w5,w5,w13
- add w21,w21,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w23,w23,#2
- eor w5,w5,w19
- add w20,w20,w4 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- orr w25,w22,w23
- and w26,w22,w23
- eor w6,w6,w8
- ror w27,w21,#27
- and w25,w25,w24
- add w24,w24,w28 // future e+=K
- eor w6,w6,w14
- add w20,w20,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w22,w22,#2
- eor w6,w6,w3
- add w24,w24,w5 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- orr w25,w21,w22
- and w26,w21,w22
- eor w7,w7,w9
- ror w27,w20,#27
- and w25,w25,w23
- add w23,w23,w28 // future e+=K
- eor w7,w7,w15
- add w24,w24,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w21,w21,#2
- eor w7,w7,w4
- add w23,w23,w6 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- orr w25,w20,w21
- and w26,w20,w21
- eor w8,w8,w10
- ror w27,w24,#27
- and w25,w25,w22
- add w22,w22,w28 // future e+=K
- eor w8,w8,w16
- add w23,w23,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w20,w20,#2
- eor w8,w8,w5
- add w22,w22,w7 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- orr w25,w24,w20
- and w26,w24,w20
- eor w9,w9,w11
- ror w27,w23,#27
- and w25,w25,w21
- add w21,w21,w28 // future e+=K
- eor w9,w9,w17
- add w22,w22,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w24,w24,#2
- eor w9,w9,w6
- add w21,w21,w8 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- orr w25,w23,w24
- and w26,w23,w24
- eor w10,w10,w12
- ror w27,w22,#27
- and w25,w25,w20
- add w20,w20,w28 // future e+=K
- eor w10,w10,w19
- add w21,w21,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w23,w23,#2
- eor w10,w10,w7
- add w20,w20,w9 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- orr w25,w22,w23
- and w26,w22,w23
- eor w11,w11,w13
- ror w27,w21,#27
- and w25,w25,w24
- add w24,w24,w28 // future e+=K
- eor w11,w11,w3
- add w20,w20,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w22,w22,#2
- eor w11,w11,w8
- add w24,w24,w10 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- orr w25,w21,w22
- and w26,w21,w22
- eor w12,w12,w14
- ror w27,w20,#27
- and w25,w25,w23
- add w23,w23,w28 // future e+=K
- eor w12,w12,w4
- add w24,w24,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w21,w21,#2
- eor w12,w12,w9
- add w23,w23,w11 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- orr w25,w20,w21
- and w26,w20,w21
- eor w13,w13,w15
- ror w27,w24,#27
- and w25,w25,w22
- add w22,w22,w28 // future e+=K
- eor w13,w13,w5
- add w23,w23,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w20,w20,#2
- eor w13,w13,w10
- add w22,w22,w12 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- orr w25,w24,w20
- and w26,w24,w20
- eor w14,w14,w16
- ror w27,w23,#27
- and w25,w25,w21
- add w21,w21,w28 // future e+=K
- eor w14,w14,w6
- add w22,w22,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w24,w24,#2
- eor w14,w14,w11
- add w21,w21,w13 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- orr w25,w23,w24
- and w26,w23,w24
- eor w15,w15,w17
- ror w27,w22,#27
- and w25,w25,w20
- add w20,w20,w28 // future e+=K
- eor w15,w15,w7
- add w21,w21,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w23,w23,#2
- eor w15,w15,w12
- add w20,w20,w14 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- movz w28,#0xc1d6
- movk w28,#0xca62,lsl#16
- orr w25,w22,w23
- and w26,w22,w23
- eor w16,w16,w19
- ror w27,w21,#27
- and w25,w25,w24
- add w24,w24,w28 // future e+=K
- eor w16,w16,w8
- add w20,w20,w27 // e+=rot(a,5)
- orr w25,w25,w26
- ror w22,w22,#2
- eor w16,w16,w13
- add w24,w24,w15 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w17,w17,w9
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w17,w17,w14
- add w23,w23,w16 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w19,w19,w10
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w19,w19,w15
- add w22,w22,w17 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- eor w3,w3,w5
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w3,w3,w11
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w3,w3,w16
- add w21,w21,w19 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w3,w3,#31
- eor w4,w4,w6
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w4,w4,w12
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w4,w4,w17
- add w20,w20,w3 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w4,w4,#31
- eor w5,w5,w7
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w5,w5,w13
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w5,w5,w19
- add w24,w24,w4 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w5,w5,#31
- eor w6,w6,w8
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w6,w6,w14
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w6,w6,w3
- add w23,w23,w5 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w6,w6,#31
- eor w7,w7,w9
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w7,w7,w15
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w7,w7,w4
- add w22,w22,w6 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w7,w7,#31
- eor w8,w8,w10
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w8,w8,w16
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w8,w8,w5
- add w21,w21,w7 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w8,w8,#31
- eor w9,w9,w11
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w9,w9,w17
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w9,w9,w6
- add w20,w20,w8 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w9,w9,#31
- eor w10,w10,w12
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w10,w10,w19
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w10,w10,w7
- add w24,w24,w9 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w10,w10,#31
- eor w11,w11,w13
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w11,w11,w3
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w11,w11,w8
- add w23,w23,w10 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w11,w11,#31
- eor w12,w12,w14
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w12,w12,w4
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w12,w12,w9
- add w22,w22,w11 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w12,w12,#31
- eor w13,w13,w15
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w13,w13,w5
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w13,w13,w10
- add w21,w21,w12 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w13,w13,#31
- eor w14,w14,w16
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w14,w14,w6
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- eor w14,w14,w11
- add w20,w20,w13 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ror w14,w14,#31
- eor w15,w15,w17
- eor w25,w24,w22
- ror w27,w21,#27
- add w24,w24,w28 // future e+=K
- eor w15,w15,w7
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- eor w15,w15,w12
- add w24,w24,w14 // future e+=X[i]
- add w20,w20,w25 // e+=F(b,c,d)
- ror w15,w15,#31
- eor w16,w16,w19
- eor w25,w23,w21
- ror w27,w20,#27
- add w23,w23,w28 // future e+=K
- eor w16,w16,w8
- eor w25,w25,w22
- add w24,w24,w27 // e+=rot(a,5)
- ror w21,w21,#2
- eor w16,w16,w13
- add w23,w23,w15 // future e+=X[i]
- add w24,w24,w25 // e+=F(b,c,d)
- ror w16,w16,#31
- eor w17,w17,w3
- eor w25,w22,w20
- ror w27,w24,#27
- add w22,w22,w28 // future e+=K
- eor w17,w17,w9
- eor w25,w25,w21
- add w23,w23,w27 // e+=rot(a,5)
- ror w20,w20,#2
- eor w17,w17,w14
- add w22,w22,w16 // future e+=X[i]
- add w23,w23,w25 // e+=F(b,c,d)
- ror w17,w17,#31
- eor w19,w19,w4
- eor w25,w21,w24
- ror w27,w23,#27
- add w21,w21,w28 // future e+=K
- eor w19,w19,w10
- eor w25,w25,w20
- add w22,w22,w27 // e+=rot(a,5)
- ror w24,w24,#2
- eor w19,w19,w15
- add w21,w21,w17 // future e+=X[i]
- add w22,w22,w25 // e+=F(b,c,d)
- ror w19,w19,#31
- ldp w4,w5,[x0]
- eor w25,w20,w23
- ror w27,w22,#27
- add w20,w20,w28 // future e+=K
- eor w25,w25,w24
- add w21,w21,w27 // e+=rot(a,5)
- ror w23,w23,#2
- add w20,w20,w19 // future e+=X[i]
- add w21,w21,w25 // e+=F(b,c,d)
- ldp w6,w7,[x0,#8]
- eor w25,w24,w22
- ror w27,w21,#27
- eor w25,w25,w23
- add w20,w20,w27 // e+=rot(a,5)
- ror w22,w22,#2
- ldr w8,[x0,#16]
- add w20,w20,w25 // e+=F(b,c,d)
- add w21,w21,w5
- add w22,w22,w6
- add w20,w20,w4
- add w23,w23,w7
- add w24,w24,w8
- stp w20,w21,[x0]
- stp w22,w23,[x0,#8]
- str w24,[x0,#16]
- cbnz x2,.Loop
-
- ldp x19,x20,[sp,#16]
- ldp x21,x22,[sp,#32]
- ldp x23,x24,[sp,#48]
- ldp x25,x26,[sp,#64]
- ldp x27,x28,[sp,#80]
- ldr x29,[sp],#96
- ret
-.size sha1_block_data_order,.-sha1_block_data_order
-.type sha1_block_armv8,%function
-.align 6
-sha1_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- adr x4,.Lconst
- eor v1.16b,v1.16b,v1.16b
- ld1 {v0.4s},[x0],#16
- ld1 {v1.s}[0],[x0]
- sub x0,x0,#16
- ld1 {v16.4s-v19.4s},[x4]
-
-.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
- sub x2,x2,#1
- rev32 v4.16b,v4.16b
- rev32 v5.16b,v5.16b
-
- add v20.4s,v16.4s,v4.4s
- rev32 v6.16b,v6.16b
- orr v22.16b,v0.16b,v0.16b // offload
-
- add v21.4s,v16.4s,v5.4s
- rev32 v7.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b
- .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0
- add v20.4s,v16.4s,v6.4s
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1
- .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
- add v21.4s,v16.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2
- .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
- add v20.4s,v16.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3
- .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
- add v21.4s,v17.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4
- .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
- add v20.4s,v17.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
- add v21.4s,v17.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
- add v20.4s,v17.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
- add v21.4s,v17.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
- add v20.4s,v18.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
- add v21.4s,v18.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
- add v20.4s,v18.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11
- .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
- add v21.4s,v18.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
- add v20.4s,v18.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13
- .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
- add v21.4s,v19.4s,v7.4s
- .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
- .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14
- .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
- add v20.4s,v19.4s,v4.4s
- .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
- .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
- add v21.4s,v19.4s,v5.4s
- .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
- .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
- add v20.4s,v19.4s,v6.4s
- .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
- add v21.4s,v19.4s,v7.4s
-
- .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18
- .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
-
- .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19
- .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
-
- add v1.4s,v1.4s,v2.4s
- add v0.4s,v0.4s,v22.4s
-
- cbnz x2,.Loop_hw
-
- st1 {v0.4s},[x0],#16
- st1 {v1.s}[0],[x0]
-
- ldr x29,[sp],#16
- ret
-.size sha1_block_armv8,.-sha1_block_armv8
-.align 6
-.Lconst:
-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
-.LOPENSSL_armcap_P:
-.quad OPENSSL_armcap_P-.
-.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha1-armv8.pl b/app/openssl/crypto/sha/asm/sha1-armv8.pl
deleted file mode 100644
index c1f552b6..00000000
--- a/app/openssl/crypto/sha/asm/sha1-armv8.pl
+++ /dev/null
@@ -1,333 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# SHA1 for ARMv8.
-#
-# Performance in cycles per processed byte and improvement coefficient
-# over code generated with "default" compiler:
-#
-# hardware-assisted software(*)
-# Apple A7 2.31 4.13 (+14%)
-# Cortex-A5x n/a n/a
-#
-# (*) Software results are presented mostly for reference purposes.
-
-$flavour = shift;
-open STDOUT,">".shift;
-
-($ctx,$inp,$num)=("x0","x1","x2");
-@Xw=map("w$_",(3..17,19));
-@Xx=map("x$_",(3..17,19));
-@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
-($t0,$t1,$t2,$K)=map("w$_",(25..28));
-
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=($i+2)&15;
-
-$code.=<<___ if ($i<15 && !($i&1));
- lsr @Xx[$i+1],@Xx[$i],#32
-___
-$code.=<<___ if ($i<14 && !($i&1));
- ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`]
-___
-$code.=<<___ if ($i<14 && ($i&1));
-#ifdef __ARMEB__
- ror @Xx[$i+1],@Xx[$i+1],#32
-#else
- rev32 @Xx[$i+1],@Xx[$i+1]
-#endif
-___
-$code.=<<___ if ($i<14);
- bic $t0,$d,$b
- and $t1,$c,$b
- ror $t2,$a,#27
- add $d,$d,$K // future e+=K
- orr $t0,$t0,$t1
- add $e,$e,$t2 // e+=rot(a,5)
- ror $b,$b,#2
- add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
- add $e,$e,$t0 // e+=F(b,c,d)
-___
-$code.=<<___ if ($i==19);
- movz $K,#0xeba1
- movk $K,#0x6ed9,lsl#16
-___
-$code.=<<___ if ($i>=14);
- eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
- bic $t0,$d,$b
- and $t1,$c,$b
- ror $t2,$a,#27
- eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
- add $d,$d,$K // future e+=K
- orr $t0,$t0,$t1
- add $e,$e,$t2 // e+=rot(a,5)
- eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
- ror $b,$b,#2
- add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
- add $e,$e,$t0 // e+=F(b,c,d)
- ror @Xw[$j],@Xw[$j],#31
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=($i+2)&15;
-
-$code.=<<___ if ($i==59);
- movz $K,#0xc1d6
- movk $K,#0xca62,lsl#16
-___
-$code.=<<___;
- orr $t0,$b,$c
- and $t1,$b,$c
- eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
- ror $t2,$a,#27
- and $t0,$t0,$d
- add $d,$d,$K // future e+=K
- eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
- add $e,$e,$t2 // e+=rot(a,5)
- orr $t0,$t0,$t1
- ror $b,$b,#2
- eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
- add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
- add $e,$e,$t0 // e+=F(b,c,d)
- ror @Xw[$j],@Xw[$j],#31
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=($i+2)&15;
-
-$code.=<<___ if ($i==39);
- movz $K,#0xbcdc
- movk $K,#0x8f1b,lsl#16
-___
-$code.=<<___ if ($i<78);
- eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
- eor $t0,$d,$b
- ror $t2,$a,#27
- add $d,$d,$K // future e+=K
- eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
- eor $t0,$t0,$c
- add $e,$e,$t2 // e+=rot(a,5)
- ror $b,$b,#2
- eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
- add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
- add $e,$e,$t0 // e+=F(b,c,d)
- ror @Xw[$j],@Xw[$j],#31
-___
-$code.=<<___ if ($i==78);
- ldp @Xw[1],@Xw[2],[$ctx]
- eor $t0,$d,$b
- ror $t2,$a,#27
- add $d,$d,$K // future e+=K
- eor $t0,$t0,$c
- add $e,$e,$t2 // e+=rot(a,5)
- ror $b,$b,#2
- add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
- add $e,$e,$t0 // e+=F(b,c,d)
-___
-$code.=<<___ if ($i==79);
- ldp @Xw[3],@Xw[4],[$ctx,#8]
- eor $t0,$d,$b
- ror $t2,$a,#27
- eor $t0,$t0,$c
- add $e,$e,$t2 // e+=rot(a,5)
- ror $b,$b,#2
- ldr @Xw[5],[$ctx,#16]
- add $e,$e,$t0 // e+=F(b,c,d)
-___
-}
-
-$code.=<<___;
-#include "arm_arch.h"
-
-.text
-
-.globl sha1_block_data_order
-.type sha1_block_data_order,%function
-.align 6
-sha1_block_data_order:
- ldr x16,.LOPENSSL_armcap_P
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA1
- b.ne .Lv8_entry
-
- stp x29,x30,[sp,#-96]!
- add x29,sp,#0
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
-
- ldp $A,$B,[$ctx]
- ldp $C,$D,[$ctx,#8]
- ldr $E,[$ctx,#16]
-
-.Loop:
- ldr @Xx[0],[$inp],#64
- movz $K,#0x7999
- sub $num,$num,#1
- movk $K,#0x5a82,lsl#16
-#ifdef __ARMEB__
- ror $Xx[0],@Xx[0],#32
-#else
- rev32 @Xx[0],@Xx[0]
-#endif
- add $E,$E,$K // warm it up
- add $E,$E,@Xw[0]
-___
-for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- add $B,$B,@Xw[2]
- add $C,$C,@Xw[3]
- add $A,$A,@Xw[1]
- add $D,$D,@Xw[4]
- add $E,$E,@Xw[5]
- stp $A,$B,[$ctx]
- stp $C,$D,[$ctx,#8]
- str $E,[$ctx,#16]
- cbnz $num,.Loop
-
- ldp x19,x20,[sp,#16]
- ldp x21,x22,[sp,#32]
- ldp x23,x24,[sp,#48]
- ldp x25,x26,[sp,#64]
- ldp x27,x28,[sp,#80]
- ldr x29,[sp],#96
- ret
-.size sha1_block_data_order,.-sha1_block_data_order
-___
-{{{
-my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
-my @MSG=map("v$_.16b",(4..7));
-my @Kxx=map("v$_.4s",(16..19));
-my ($W0,$W1)=("v20.4s","v21.4s");
-my $ABCD_SAVE="v22.16b";
-
-$code.=<<___;
-.type sha1_block_armv8,%function
-.align 6
-sha1_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- adr x4,.Lconst
- eor $E,$E,$E
- ld1.32 {$ABCD},[$ctx],#16
- ld1.32 {$E}[0],[$ctx]
- sub $ctx,$ctx,#16
- ld1.32 {@Kxx[0]-@Kxx[3]},[x4]
-
-.Loop_hw:
- ld1 {@MSG[0]-@MSG[3]},[$inp],#64
- sub $num,$num,#1
- rev32 @MSG[0],@MSG[0]
- rev32 @MSG[1],@MSG[1]
-
- add.i32 $W0,@Kxx[0],@MSG[0]
- rev32 @MSG[2],@MSG[2]
- orr $ABCD_SAVE,$ABCD,$ABCD // offload
-
- add.i32 $W1,@Kxx[0],@MSG[1]
- rev32 @MSG[3],@MSG[3]
- sha1h $E1,$ABCD
- sha1c $ABCD,$E,$W0 // 0
- add.i32 $W0,@Kxx[$j],@MSG[2]
- sha1su0 @MSG[0],@MSG[1],@MSG[2]
-___
-for ($j=0,$i=1;$i<20-3;$i++) {
-my $f=("c","p","m","p")[$i/5];
-$code.=<<___;
- sha1h $E0,$ABCD // $i
- sha1$f $ABCD,$E1,$W1
- add.i32 $W1,@Kxx[$j],@MSG[3]
- sha1su1 @MSG[0],@MSG[3]
-___
-$code.=<<___ if ($i<20-4);
- sha1su0 @MSG[1],@MSG[2],@MSG[3]
-___
- ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
- push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
-}
-$code.=<<___;
- sha1h $E0,$ABCD // $i
- sha1p $ABCD,$E1,$W1
- add.i32 $W1,@Kxx[$j],@MSG[3]
-
- sha1h $E1,$ABCD // 18
- sha1p $ABCD,$E0,$W0
-
- sha1h $E0,$ABCD // 19
- sha1p $ABCD,$E1,$W1
-
- add.i32 $E,$E,$E0
- add.i32 $ABCD,$ABCD,$ABCD_SAVE
-
- cbnz $num,.Loop_hw
-
- st1.32 {$ABCD},[$ctx],#16
- st1.32 {$E}[0],[$ctx]
-
- ldr x29,[sp],#16
- ret
-.size sha1_block_armv8,.-sha1_block_armv8
-.align 6
-.Lconst:
-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
-.LOPENSSL_armcap_P:
-.quad OPENSSL_armcap_P-.
-.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
-___
-}}}
-
-{ my %opcode = (
- "sha1c" => 0x5e000000, "sha1p" => 0x5e001000,
- "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000,
- "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 );
-
- sub unsha1 {
- my ($mnemonic,$arg)=@_;
-
- $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
- &&
- sprintf ".inst\t0x%08x\t//%s %s",
- $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
- $mnemonic,$arg;
- }
-}
-
-foreach(split("\n",$code)) {
-
- s/\`([^\`]*)\`/eval($1)/geo;
-
- s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
-
- s/\.\w?32\b//o and s/\.16b/\.4s/go;
- m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
-
- print $_,"\n";
-}
-
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-ia64.pl b/app/openssl/crypto/sha/asm/sha1-ia64.pl
deleted file mode 100644
index 02d35d16..00000000
--- a/app/openssl/crypto/sha/asm/sha1-ia64.pl
+++ /dev/null
@@ -1,305 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# Eternal question is what's wrong with compiler generated code? The
-# trick is that it's possible to reduce the number of shifts required
-# to perform rotations by maintaining copy of 32-bit value in upper
-# bits of 64-bit register. Just follow mux2 and shrp instructions...
-# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
-# is >50% better than HP C and >2x better than gcc.
-
-$code=<<___;
-.ident \"sha1-ia64.s, version 1.3\"
-.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
-.explicit
-
-___
-
-
-if ($^O eq "hpux") {
- $ADDP="addp4";
- for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
-} else { $ADDP="add"; }
-
-#$human=1;
-if ($human) { # useful for visual code auditing...
- ($A,$B,$C,$D,$E) = ("A","B","C","D","E");
- ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4");
- ($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
- ( "K_00_19","K_20_39","K_40_59","K_60_79" );
- @X= ( "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
- "X8", "X9","X10","X11","X12","X13","X14","X15" );
-}
-else {
- ($A,$B,$C,$D,$E) = ("loc0","loc1","loc2","loc3","loc4");
- ($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9");
- ($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
- ( "r14", "r15", "loc10", "loc11" );
- @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
- "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" );
-}
-
-sub BODY_00_15 {
-local *code=shift;
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-my $Xn=@X[$j%16];
-
-$code.=<<___ if ($i==0);
-{ .mmi; ld1 $X[$i]=[inp],2 // MSB
- ld1 tmp2=[tmp3],2 };;
-{ .mmi; ld1 tmp0=[inp],2
- ld1 tmp4=[tmp3],2 // LSB
- dep $X[$i]=$X[$i],tmp2,8,8 };;
-___
-if ($i<15) {
- $code.=<<___;
-{ .mmi; ld1 $Xn=[inp],2 // forward Xload
- nop.m 0x0
- dep tmp1=tmp0,tmp4,8,8 };;
-{ .mmi; ld1 tmp2=[tmp3],2 // forward Xload
- and tmp4=$c,$b
- dep $X[$i]=$X[$i],tmp1,16,16} //;;
-{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19
- andcm tmp1=$d,$b
- dep.z tmp5=$a,5,27 };; // a<<5
-{ .mmi; add $e=$e,$X[$i] // e+=Xload
- or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
- extr.u tmp1=$a,27,5 };; // a>>27
-{ .mmi; ld1 tmp0=[inp],2 // forward Xload
- add $e=$e,tmp4 // e+=F_00_19(b,c,d)
- shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
-{ .mmi; ld1 tmp4=[tmp3],2 // forward Xload
- or tmp5=tmp1,tmp5 // ROTATE(a,5)
- mux2 tmp6=$a,0x44 };; // see b in next iteration
-{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)
- dep $Xn=$Xn,tmp2,8,8 // forward Xload
- mux2 $X[$i]=$X[$i],0x44 } //;;
-
-___
- }
-else {
- $code.=<<___;
-{ .mii; and tmp3=$c,$b
- dep tmp1=tmp0,tmp4,8,8;;
- dep $X[$i]=$X[$i],tmp1,16,16} //;;
-{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19
- andcm tmp1=$d,$b
- dep.z tmp5=$a,5,27 };; // a<<5
-{ .mmi; add $e=$e,$X[$i] // e+=Xupdate
- or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
- extr.u tmp1=$a,27,5 } // a>>27
-{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
- xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
- nop.i 0 };;
-{ .mmi; add $e=$e,tmp4 // e+=F_00_19(b,c,d)
- xor $Xn=$Xn,tmp3 // forward Xupdate
- shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
-{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
- mux2 tmp6=$a,0x44 };; // see b in next iteration
-{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
- shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
- mux2 $X[$i]=$X[$i],0x44 };;
-
-___
- }
-}
-
-sub BODY_16_19 {
-local *code=shift;
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-my $Xn=@X[$j%16];
-
-$code.=<<___;
-{ .mib; add $e=$e,$K_00_19 // e+=K_00_19
- dep.z tmp5=$a,5,27 } // a<<5
-{ .mib; andcm tmp1=$d,$b
- and tmp0=$c,$b };;
-{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate
- or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d)
- extr.u tmp1=$a,27,5 } // a>>27
-{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
- xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
- nop.i 0 };;
-{ .mmi; add $e=$e,tmp0 // f+=F_00_19(b,c,d)
- xor $Xn=$Xn,tmp3 // forward Xupdate
- shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
-{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
- mux2 tmp6=$a,0x44 };; // see b in next iteration
-{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
- shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
- nop.i 0 };;
-
-___
-}
-
-sub BODY_20_39 {
-local *code=shift;
-my ($i,$a,$b,$c,$d,$e,$Konst)=@_;
- $Konst = $K_20_39 if (!defined($Konst));
-my $j=$i+1;
-my $Xn=@X[$j%16];
-
-if ($i<79) {
-$code.=<<___;
-{ .mib; add $e=$e,$Konst // e+=K_XX_XX
- dep.z tmp5=$a,5,27 } // a<<5
-{ .mib; xor tmp0=$c,$b
- xor $Xn=$Xn,$X[($j+2)%16] };; // forward Xupdate
-{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate
- extr.u tmp1=$a,27,5 } // a>>27
-{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d
- xor $Xn=$Xn,$X[($j+8)%16] };; // forward Xupdate
-{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d)
- xor $Xn=$Xn,$X[($j+13)%16] // forward Xupdate
- shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
-{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
- mux2 tmp6=$a,0x44 };; // see b in next iteration
-{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
- shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
- nop.i 0 };;
-
-___
-}
-else {
-$code.=<<___;
-{ .mib; add $e=$e,$Konst // e+=K_60_79
- dep.z tmp5=$a,5,27 } // a<<5
-{ .mib; xor tmp0=$c,$b
- add $h1=$h1,$a };; // wrap up
-{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate
- extr.u tmp1=$a,27,5 } // a>>27
-{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d
- add $h3=$h3,$c };; // wrap up
-{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d)
- or tmp1=tmp1,tmp5 // ROTATE(a,5)
- shrp $b=tmp6,tmp6,2 };; // b=ROTATE(b,30) ;;?
-{ .mmi; add $e=$e,tmp1 // e+=ROTATE(a,5)
- add tmp3=1,inp // used in unaligned codepath
- add $h4=$h4,$d };; // wrap up
-
-___
-}
-}
-
-sub BODY_40_59 {
-local *code=shift;
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-my $Xn=@X[$j%16];
-
-$code.=<<___;
-{ .mib; add $e=$e,$K_40_59 // e+=K_40_59
- dep.z tmp5=$a,5,27 } // a<<5
-{ .mib; and tmp1=$c,$d
- xor tmp0=$c,$d };;
-{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate
- add tmp5=tmp5,tmp1 // a<<5+(c&d)
- extr.u tmp1=$a,27,5 } // a>>27
-{ .mmi; and tmp0=tmp0,$b
- xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
- xor tmp3=$X[($j+8)%16],$X[($j+13)%16] };; // forward Xupdate
-{ .mmi; add $e=$e,tmp0 // e+=b&(c^d)
- add tmp5=tmp5,tmp1 // ROTATE(a,5)+(c&d)
- shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
-{ .mmi; xor $Xn=$Xn,tmp3
- mux2 tmp6=$a,0x44 };; // see b in next iteration
-{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)+(c&d)
- shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
- nop.i 0x0 };;
-
-___
-}
-sub BODY_60_79 { &BODY_20_39(@_,$K_60_79); }
-
-$code.=<<___;
-.text
-
-tmp0=r8;
-tmp1=r9;
-tmp2=r10;
-tmp3=r11;
-ctx=r32; // in0
-inp=r33; // in1
-
-// void sha1_block_data_order(SHA_CTX *c,const void *p,size_t num);
-.global sha1_block_data_order#
-.proc sha1_block_data_order#
-.align 32
-sha1_block_data_order:
- .prologue
-{ .mmi; alloc tmp1=ar.pfs,3,14,0,0
- $ADDP tmp0=4,ctx
- .save ar.lc,r3
- mov r3=ar.lc }
-{ .mmi; $ADDP ctx=0,ctx
- $ADDP inp=0,inp
- mov r2=pr };;
-tmp4=in2;
-tmp5=loc12;
-tmp6=loc13;
- .body
-{ .mlx; ld4 $h0=[ctx],8
- movl $K_00_19=0x5a827999 }
-{ .mlx; ld4 $h1=[tmp0],8
- movl $K_20_39=0x6ed9eba1 };;
-{ .mlx; ld4 $h2=[ctx],8
- movl $K_40_59=0x8f1bbcdc }
-{ .mlx; ld4 $h3=[tmp0]
- movl $K_60_79=0xca62c1d6 };;
-{ .mmi; ld4 $h4=[ctx],-16
- add in2=-1,in2 // adjust num for ar.lc
- mov ar.ec=1 };;
-{ .mmi; nop.m 0
- add tmp3=1,inp
- mov ar.lc=in2 };; // brp.loop.imp: too far
-
-.Ldtop:
-{ .mmi; mov $A=$h0
- mov $B=$h1
- mux2 tmp6=$h1,0x44 }
-{ .mmi; mov $C=$h2
- mov $D=$h3
- mov $E=$h4 };;
-
-___
-
-{ my $i;
- my @V=($A,$B,$C,$D,$E);
-
- for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); }
- for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); }
- for(;$i<40;$i++) { &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); }
- for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); }
- for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); }
-
- (($V[0] eq $A) and ($V[4] eq $E)) or die; # double-check
-}
-
-$code.=<<___;
-{ .mmb; add $h0=$h0,$A
- add $h2=$h2,$C
- br.ctop.dptk.many .Ldtop };;
-.Ldend:
-{ .mmi; add tmp0=4,ctx
- mov ar.lc=r3 };;
-{ .mmi; st4 [ctx]=$h0,8
- st4 [tmp0]=$h1,8 };;
-{ .mmi; st4 [ctx]=$h2,8
- st4 [tmp0]=$h3 };;
-{ .mib; st4 [ctx]=$h4,-16
- mov pr=r2,0x1ffff
- br.ret.sptk.many b0 };;
-.endp sha1_block_data_order#
-stringz "SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
-___
-
-$output=shift and open STDOUT,">$output";
-print $code;
diff --git a/app/openssl/crypto/sha/asm/sha1-mips.S b/app/openssl/crypto/sha/asm/sha1-mips.S
deleted file mode 100644
index 865da255..00000000
--- a/app/openssl/crypto/sha/asm/sha1-mips.S
+++ /dev/null
@@ -1,1664 +0,0 @@
-#ifdef OPENSSL_FIPSCANISTER
-# include <openssl/fipssyms.h>
-#endif
-
-.text
-
-.set noat
-.set noreorder
-.align 5
-.globl sha1_block_data_order
-.ent sha1_block_data_order
-sha1_block_data_order:
- .frame $29,16*4,$31
- .mask 3237937152,-4
- .set noreorder
- sub $29,16*4
- sw $31,(16-1)*4($29)
- sw $30,(16-2)*4($29)
- sw $23,(16-3)*4($29)
- sw $22,(16-4)*4($29)
- sw $21,(16-5)*4($29)
- sw $20,(16-6)*4($29)
- sw $19,(16-7)*4($29)
- sw $18,(16-8)*4($29)
- sw $17,(16-9)*4($29)
- sw $16,(16-10)*4($29)
- sll $6,6
- add $6,$5
- sw $6,0($29)
- lw $1,0($4)
- lw $2,4($4)
- lw $3,8($4)
- lw $7,12($4)
- b .Loop
- lw $24,16($4)
-.align 4
-.Loop:
- .set reorder
- lwl $8,3($5)
- lui $31,0x5a82
- lwr $8,0($5)
- ori $31,0x7999 # K_00_19
- srl $25,$8,24 # byte swap(0)
- srl $6,$8,8
- andi $30,$8,0xFF00
- sll $8,$8,24
- andi $6,0xFF00
- sll $30,$30,8
- or $8,$25
- or $6,$30
- or $8,$6
- lwl $9,1*4+3($5)
- sll $25,$1,5 # 0
- addu $24,$31
- lwr $9,1*4+0($5)
- srl $6,$1,27
- addu $24,$25
- xor $25,$3,$7
- addu $24,$6
- sll $30,$2,30
- and $25,$2
- srl $2,$2,2
- xor $25,$7
- addu $24,$8
- or $2,$30
- addu $24,$25
- srl $25,$9,24 # byte swap(1)
- srl $6,$9,8
- andi $30,$9,0xFF00
- sll $9,$9,24
- andi $6,0xFF00
- sll $30,$30,8
- or $9,$25
- or $6,$30
- or $9,$6
- lwl $10,2*4+3($5)
- sll $25,$24,5 # 1
- addu $7,$31
- lwr $10,2*4+0($5)
- srl $6,$24,27
- addu $7,$25
- xor $25,$2,$3
- addu $7,$6
- sll $30,$1,30
- and $25,$1
- srl $1,$1,2
- xor $25,$3
- addu $7,$9
- or $1,$30
- addu $7,$25
- srl $25,$10,24 # byte swap(2)
- srl $6,$10,8
- andi $30,$10,0xFF00
- sll $10,$10,24
- andi $6,0xFF00
- sll $30,$30,8
- or $10,$25
- or $6,$30
- or $10,$6
- lwl $11,3*4+3($5)
- sll $25,$7,5 # 2
- addu $3,$31
- lwr $11,3*4+0($5)
- srl $6,$7,27
- addu $3,$25
- xor $25,$1,$2
- addu $3,$6
- sll $30,$24,30
- and $25,$24
- srl $24,$24,2
- xor $25,$2
- addu $3,$10
- or $24,$30
- addu $3,$25
- srl $25,$11,24 # byte swap(3)
- srl $6,$11,8
- andi $30,$11,0xFF00
- sll $11,$11,24
- andi $6,0xFF00
- sll $30,$30,8
- or $11,$25
- or $6,$30
- or $11,$6
- lwl $12,4*4+3($5)
- sll $25,$3,5 # 3
- addu $2,$31
- lwr $12,4*4+0($5)
- srl $6,$3,27
- addu $2,$25
- xor $25,$24,$1
- addu $2,$6
- sll $30,$7,30
- and $25,$7
- srl $7,$7,2
- xor $25,$1
- addu $2,$11
- or $7,$30
- addu $2,$25
- srl $25,$12,24 # byte swap(4)
- srl $6,$12,8
- andi $30,$12,0xFF00
- sll $12,$12,24
- andi $6,0xFF00
- sll $30,$30,8
- or $12,$25
- or $6,$30
- or $12,$6
- lwl $13,5*4+3($5)
- sll $25,$2,5 # 4
- addu $1,$31
- lwr $13,5*4+0($5)
- srl $6,$2,27
- addu $1,$25
- xor $25,$7,$24
- addu $1,$6
- sll $30,$3,30
- and $25,$3
- srl $3,$3,2
- xor $25,$24
- addu $1,$12
- or $3,$30
- addu $1,$25
- srl $25,$13,24 # byte swap(5)
- srl $6,$13,8
- andi $30,$13,0xFF00
- sll $13,$13,24
- andi $6,0xFF00
- sll $30,$30,8
- or $13,$25
- or $6,$30
- or $13,$6
- lwl $14,6*4+3($5)
- sll $25,$1,5 # 5
- addu $24,$31
- lwr $14,6*4+0($5)
- srl $6,$1,27
- addu $24,$25
- xor $25,$3,$7
- addu $24,$6
- sll $30,$2,30
- and $25,$2
- srl $2,$2,2
- xor $25,$7
- addu $24,$13
- or $2,$30
- addu $24,$25
- srl $25,$14,24 # byte swap(6)
- srl $6,$14,8
- andi $30,$14,0xFF00
- sll $14,$14,24
- andi $6,0xFF00
- sll $30,$30,8
- or $14,$25
- or $6,$30
- or $14,$6
- lwl $15,7*4+3($5)
- sll $25,$24,5 # 6
- addu $7,$31
- lwr $15,7*4+0($5)
- srl $6,$24,27
- addu $7,$25
- xor $25,$2,$3
- addu $7,$6
- sll $30,$1,30
- and $25,$1
- srl $1,$1,2
- xor $25,$3
- addu $7,$14
- or $1,$30
- addu $7,$25
- srl $25,$15,24 # byte swap(7)
- srl $6,$15,8
- andi $30,$15,0xFF00
- sll $15,$15,24
- andi $6,0xFF00
- sll $30,$30,8
- or $15,$25
- or $6,$30
- or $15,$6
- lwl $16,8*4+3($5)
- sll $25,$7,5 # 7
- addu $3,$31
- lwr $16,8*4+0($5)
- srl $6,$7,27
- addu $3,$25
- xor $25,$1,$2
- addu $3,$6
- sll $30,$24,30
- and $25,$24
- srl $24,$24,2
- xor $25,$2
- addu $3,$15
- or $24,$30
- addu $3,$25
- srl $25,$16,24 # byte swap(8)
- srl $6,$16,8
- andi $30,$16,0xFF00
- sll $16,$16,24
- andi $6,0xFF00
- sll $30,$30,8
- or $16,$25
- or $6,$30
- or $16,$6
- lwl $17,9*4+3($5)
- sll $25,$3,5 # 8
- addu $2,$31
- lwr $17,9*4+0($5)
- srl $6,$3,27
- addu $2,$25
- xor $25,$24,$1
- addu $2,$6
- sll $30,$7,30
- and $25,$7
- srl $7,$7,2
- xor $25,$1
- addu $2,$16
- or $7,$30
- addu $2,$25
- srl $25,$17,24 # byte swap(9)
- srl $6,$17,8
- andi $30,$17,0xFF00
- sll $17,$17,24
- andi $6,0xFF00
- sll $30,$30,8
- or $17,$25
- or $6,$30
- or $17,$6
- lwl $18,10*4+3($5)
- sll $25,$2,5 # 9
- addu $1,$31
- lwr $18,10*4+0($5)
- srl $6,$2,27
- addu $1,$25
- xor $25,$7,$24
- addu $1,$6
- sll $30,$3,30
- and $25,$3
- srl $3,$3,2
- xor $25,$24
- addu $1,$17
- or $3,$30
- addu $1,$25
- srl $25,$18,24 # byte swap(10)
- srl $6,$18,8
- andi $30,$18,0xFF00
- sll $18,$18,24
- andi $6,0xFF00
- sll $30,$30,8
- or $18,$25
- or $6,$30
- or $18,$6
- lwl $19,11*4+3($5)
- sll $25,$1,5 # 10
- addu $24,$31
- lwr $19,11*4+0($5)
- srl $6,$1,27
- addu $24,$25
- xor $25,$3,$7
- addu $24,$6
- sll $30,$2,30
- and $25,$2
- srl $2,$2,2
- xor $25,$7
- addu $24,$18
- or $2,$30
- addu $24,$25
- srl $25,$19,24 # byte swap(11)
- srl $6,$19,8
- andi $30,$19,0xFF00
- sll $19,$19,24
- andi $6,0xFF00
- sll $30,$30,8
- or $19,$25
- or $6,$30
- or $19,$6
- lwl $20,12*4+3($5)
- sll $25,$24,5 # 11
- addu $7,$31
- lwr $20,12*4+0($5)
- srl $6,$24,27
- addu $7,$25
- xor $25,$2,$3
- addu $7,$6
- sll $30,$1,30
- and $25,$1
- srl $1,$1,2
- xor $25,$3
- addu $7,$19
- or $1,$30
- addu $7,$25
- srl $25,$20,24 # byte swap(12)
- srl $6,$20,8
- andi $30,$20,0xFF00
- sll $20,$20,24
- andi $6,0xFF00
- sll $30,$30,8
- or $20,$25
- or $6,$30
- or $20,$6
- lwl $21,13*4+3($5)
- sll $25,$7,5 # 12
- addu $3,$31
- lwr $21,13*4+0($5)
- srl $6,$7,27
- addu $3,$25
- xor $25,$1,$2
- addu $3,$6
- sll $30,$24,30
- and $25,$24
- srl $24,$24,2
- xor $25,$2
- addu $3,$20
- or $24,$30
- addu $3,$25
- srl $25,$21,24 # byte swap(13)
- srl $6,$21,8
- andi $30,$21,0xFF00
- sll $21,$21,24
- andi $6,0xFF00
- sll $30,$30,8
- or $21,$25
- or $6,$30
- or $21,$6
- lwl $22,14*4+3($5)
- sll $25,$3,5 # 13
- addu $2,$31
- lwr $22,14*4+0($5)
- srl $6,$3,27
- addu $2,$25
- xor $25,$24,$1
- addu $2,$6
- sll $30,$7,30
- and $25,$7
- srl $7,$7,2
- xor $25,$1
- addu $2,$21
- or $7,$30
- addu $2,$25
- srl $25,$22,24 # byte swap(14)
- srl $6,$22,8
- andi $30,$22,0xFF00
- sll $22,$22,24
- andi $6,0xFF00
- sll $30,$30,8
- or $22,$25
- or $6,$30
- or $22,$6
- lwl $23,15*4+3($5)
- sll $25,$2,5 # 14
- addu $1,$31
- lwr $23,15*4+0($5)
- srl $6,$2,27
- addu $1,$25
- xor $25,$7,$24
- addu $1,$6
- sll $30,$3,30
- and $25,$3
- srl $3,$3,2
- xor $25,$24
- addu $1,$22
- or $3,$30
- addu $1,$25
- srl $25,$23,24 # byte swap(15)
- srl $6,$23,8
- andi $30,$23,0xFF00
- sll $23,$23,24
- andi $6,0xFF00
- sll $30,$30,8
- or $23,$25
- or $23,$6
- or $23,$30
- xor $8,$10
- sll $25,$1,5 # 15
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $8,$16
- xor $25,$3,$7
- addu $24,$6
- xor $8,$21
- sll $30,$2,30
- and $25,$2
- srl $6,$8,31
- addu $8,$8
- srl $2,$2,2
- xor $25,$7
- or $8,$6
- addu $24,$23
- or $2,$30
- addu $24,$25
- xor $9,$11
- sll $25,$24,5 # 16
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $9,$17
- xor $25,$2,$3
- addu $7,$6
- xor $9,$22
- sll $30,$1,30
- and $25,$1
- srl $6,$9,31
- addu $9,$9
- srl $1,$1,2
- xor $25,$3
- or $9,$6
- addu $7,$8
- or $1,$30
- addu $7,$25
- xor $10,$12
- sll $25,$7,5 # 17
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $10,$18
- xor $25,$1,$2
- addu $3,$6
- xor $10,$23
- sll $30,$24,30
- and $25,$24
- srl $6,$10,31
- addu $10,$10
- srl $24,$24,2
- xor $25,$2
- or $10,$6
- addu $3,$9
- or $24,$30
- addu $3,$25
- xor $11,$13
- sll $25,$3,5 # 18
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $11,$19
- xor $25,$24,$1
- addu $2,$6
- xor $11,$8
- sll $30,$7,30
- and $25,$7
- srl $6,$11,31
- addu $11,$11
- srl $7,$7,2
- xor $25,$1
- or $11,$6
- addu $2,$10
- or $7,$30
- addu $2,$25
- xor $12,$14
- sll $25,$2,5 # 19
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $12,$20
- xor $25,$7,$24
- addu $1,$6
- xor $12,$9
- sll $30,$3,30
- and $25,$3
- srl $6,$12,31
- addu $12,$12
- srl $3,$3,2
- xor $25,$24
- or $12,$6
- addu $1,$11
- or $3,$30
- addu $1,$25
- lui $31,0x6ed9
- ori $31,0xeba1 # K_20_39
- xor $13,$15
- sll $25,$1,5 # 20
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $13,$21
- xor $25,$3,$7
- addu $24,$6
- xor $13,$10
- sll $30,$2,30
- xor $25,$2
- srl $6,$13,31
- addu $13,$13
- srl $2,$2,2
- addu $24,$12
- or $13,$6
- or $2,$30
- addu $24,$25
- xor $14,$16
- sll $25,$24,5 # 21
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $14,$22
- xor $25,$2,$3
- addu $7,$6
- xor $14,$11
- sll $30,$1,30
- xor $25,$1
- srl $6,$14,31
- addu $14,$14
- srl $1,$1,2
- addu $7,$13
- or $14,$6
- or $1,$30
- addu $7,$25
- xor $15,$17
- sll $25,$7,5 # 22
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $15,$23
- xor $25,$1,$2
- addu $3,$6
- xor $15,$12
- sll $30,$24,30
- xor $25,$24
- srl $6,$15,31
- addu $15,$15
- srl $24,$24,2
- addu $3,$14
- or $15,$6
- or $24,$30
- addu $3,$25
- xor $16,$18
- sll $25,$3,5 # 23
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $16,$8
- xor $25,$24,$1
- addu $2,$6
- xor $16,$13
- sll $30,$7,30
- xor $25,$7
- srl $6,$16,31
- addu $16,$16
- srl $7,$7,2
- addu $2,$15
- or $16,$6
- or $7,$30
- addu $2,$25
- xor $17,$19
- sll $25,$2,5 # 24
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $17,$9
- xor $25,$7,$24
- addu $1,$6
- xor $17,$14
- sll $30,$3,30
- xor $25,$3
- srl $6,$17,31
- addu $17,$17
- srl $3,$3,2
- addu $1,$16
- or $17,$6
- or $3,$30
- addu $1,$25
- xor $18,$20
- sll $25,$1,5 # 25
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $18,$10
- xor $25,$3,$7
- addu $24,$6
- xor $18,$15
- sll $30,$2,30
- xor $25,$2
- srl $6,$18,31
- addu $18,$18
- srl $2,$2,2
- addu $24,$17
- or $18,$6
- or $2,$30
- addu $24,$25
- xor $19,$21
- sll $25,$24,5 # 26
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $19,$11
- xor $25,$2,$3
- addu $7,$6
- xor $19,$16
- sll $30,$1,30
- xor $25,$1
- srl $6,$19,31
- addu $19,$19
- srl $1,$1,2
- addu $7,$18
- or $19,$6
- or $1,$30
- addu $7,$25
- xor $20,$22
- sll $25,$7,5 # 27
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $20,$12
- xor $25,$1,$2
- addu $3,$6
- xor $20,$17
- sll $30,$24,30
- xor $25,$24
- srl $6,$20,31
- addu $20,$20
- srl $24,$24,2
- addu $3,$19
- or $20,$6
- or $24,$30
- addu $3,$25
- xor $21,$23
- sll $25,$3,5 # 28
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $21,$13
- xor $25,$24,$1
- addu $2,$6
- xor $21,$18
- sll $30,$7,30
- xor $25,$7
- srl $6,$21,31
- addu $21,$21
- srl $7,$7,2
- addu $2,$20
- or $21,$6
- or $7,$30
- addu $2,$25
- xor $22,$8
- sll $25,$2,5 # 29
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $22,$14
- xor $25,$7,$24
- addu $1,$6
- xor $22,$19
- sll $30,$3,30
- xor $25,$3
- srl $6,$22,31
- addu $22,$22
- srl $3,$3,2
- addu $1,$21
- or $22,$6
- or $3,$30
- addu $1,$25
- xor $23,$9
- sll $25,$1,5 # 30
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $23,$15
- xor $25,$3,$7
- addu $24,$6
- xor $23,$20
- sll $30,$2,30
- xor $25,$2
- srl $6,$23,31
- addu $23,$23
- srl $2,$2,2
- addu $24,$22
- or $23,$6
- or $2,$30
- addu $24,$25
- xor $8,$10
- sll $25,$24,5 # 31
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $8,$16
- xor $25,$2,$3
- addu $7,$6
- xor $8,$21
- sll $30,$1,30
- xor $25,$1
- srl $6,$8,31
- addu $8,$8
- srl $1,$1,2
- addu $7,$23
- or $8,$6
- or $1,$30
- addu $7,$25
- xor $9,$11
- sll $25,$7,5 # 32
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $9,$17
- xor $25,$1,$2
- addu $3,$6
- xor $9,$22
- sll $30,$24,30
- xor $25,$24
- srl $6,$9,31
- addu $9,$9
- srl $24,$24,2
- addu $3,$8
- or $9,$6
- or $24,$30
- addu $3,$25
- xor $10,$12
- sll $25,$3,5 # 33
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $10,$18
- xor $25,$24,$1
- addu $2,$6
- xor $10,$23
- sll $30,$7,30
- xor $25,$7
- srl $6,$10,31
- addu $10,$10
- srl $7,$7,2
- addu $2,$9
- or $10,$6
- or $7,$30
- addu $2,$25
- xor $11,$13
- sll $25,$2,5 # 34
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $11,$19
- xor $25,$7,$24
- addu $1,$6
- xor $11,$8
- sll $30,$3,30
- xor $25,$3
- srl $6,$11,31
- addu $11,$11
- srl $3,$3,2
- addu $1,$10
- or $11,$6
- or $3,$30
- addu $1,$25
- xor $12,$14
- sll $25,$1,5 # 35
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $12,$20
- xor $25,$3,$7
- addu $24,$6
- xor $12,$9
- sll $30,$2,30
- xor $25,$2
- srl $6,$12,31
- addu $12,$12
- srl $2,$2,2
- addu $24,$11
- or $12,$6
- or $2,$30
- addu $24,$25
- xor $13,$15
- sll $25,$24,5 # 36
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $13,$21
- xor $25,$2,$3
- addu $7,$6
- xor $13,$10
- sll $30,$1,30
- xor $25,$1
- srl $6,$13,31
- addu $13,$13
- srl $1,$1,2
- addu $7,$12
- or $13,$6
- or $1,$30
- addu $7,$25
- xor $14,$16
- sll $25,$7,5 # 37
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $14,$22
- xor $25,$1,$2
- addu $3,$6
- xor $14,$11
- sll $30,$24,30
- xor $25,$24
- srl $6,$14,31
- addu $14,$14
- srl $24,$24,2
- addu $3,$13
- or $14,$6
- or $24,$30
- addu $3,$25
- xor $15,$17
- sll $25,$3,5 # 38
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $15,$23
- xor $25,$24,$1
- addu $2,$6
- xor $15,$12
- sll $30,$7,30
- xor $25,$7
- srl $6,$15,31
- addu $15,$15
- srl $7,$7,2
- addu $2,$14
- or $15,$6
- or $7,$30
- addu $2,$25
- xor $16,$18
- sll $25,$2,5 # 39
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $16,$8
- xor $25,$7,$24
- addu $1,$6
- xor $16,$13
- sll $30,$3,30
- xor $25,$3
- srl $6,$16,31
- addu $16,$16
- srl $3,$3,2
- addu $1,$15
- or $16,$6
- or $3,$30
- addu $1,$25
- lui $31,0x8f1b
- ori $31,0xbcdc # K_40_59
- xor $17,$19
- sll $25,$1,5 # 40
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $17,$9
- and $25,$3,$7
- addu $24,$6
- xor $17,$14
- sll $30,$2,30
- addu $24,$25
- srl $6,$17,31
- xor $25,$3,$7
- addu $17,$17
- and $25,$2
- srl $2,$2,2
- or $17,$6
- addu $24,$16
- or $2,$30
- addu $24,$25
- xor $18,$20
- sll $25,$24,5 # 41
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $18,$10
- and $25,$2,$3
- addu $7,$6
- xor $18,$15
- sll $30,$1,30
- addu $7,$25
- srl $6,$18,31
- xor $25,$2,$3
- addu $18,$18
- and $25,$1
- srl $1,$1,2
- or $18,$6
- addu $7,$17
- or $1,$30
- addu $7,$25
- xor $19,$21
- sll $25,$7,5 # 42
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $19,$11
- and $25,$1,$2
- addu $3,$6
- xor $19,$16
- sll $30,$24,30
- addu $3,$25
- srl $6,$19,31
- xor $25,$1,$2
- addu $19,$19
- and $25,$24
- srl $24,$24,2
- or $19,$6
- addu $3,$18
- or $24,$30
- addu $3,$25
- xor $20,$22
- sll $25,$3,5 # 43
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $20,$12
- and $25,$24,$1
- addu $2,$6
- xor $20,$17
- sll $30,$7,30
- addu $2,$25
- srl $6,$20,31
- xor $25,$24,$1
- addu $20,$20
- and $25,$7
- srl $7,$7,2
- or $20,$6
- addu $2,$19
- or $7,$30
- addu $2,$25
- xor $21,$23
- sll $25,$2,5 # 44
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $21,$13
- and $25,$7,$24
- addu $1,$6
- xor $21,$18
- sll $30,$3,30
- addu $1,$25
- srl $6,$21,31
- xor $25,$7,$24
- addu $21,$21
- and $25,$3
- srl $3,$3,2
- or $21,$6
- addu $1,$20
- or $3,$30
- addu $1,$25
- xor $22,$8
- sll $25,$1,5 # 45
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $22,$14
- and $25,$3,$7
- addu $24,$6
- xor $22,$19
- sll $30,$2,30
- addu $24,$25
- srl $6,$22,31
- xor $25,$3,$7
- addu $22,$22
- and $25,$2
- srl $2,$2,2
- or $22,$6
- addu $24,$21
- or $2,$30
- addu $24,$25
- xor $23,$9
- sll $25,$24,5 # 46
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $23,$15
- and $25,$2,$3
- addu $7,$6
- xor $23,$20
- sll $30,$1,30
- addu $7,$25
- srl $6,$23,31
- xor $25,$2,$3
- addu $23,$23
- and $25,$1
- srl $1,$1,2
- or $23,$6
- addu $7,$22
- or $1,$30
- addu $7,$25
- xor $8,$10
- sll $25,$7,5 # 47
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $8,$16
- and $25,$1,$2
- addu $3,$6
- xor $8,$21
- sll $30,$24,30
- addu $3,$25
- srl $6,$8,31
- xor $25,$1,$2
- addu $8,$8
- and $25,$24
- srl $24,$24,2
- or $8,$6
- addu $3,$23
- or $24,$30
- addu $3,$25
- xor $9,$11
- sll $25,$3,5 # 48
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $9,$17
- and $25,$24,$1
- addu $2,$6
- xor $9,$22
- sll $30,$7,30
- addu $2,$25
- srl $6,$9,31
- xor $25,$24,$1
- addu $9,$9
- and $25,$7
- srl $7,$7,2
- or $9,$6
- addu $2,$8
- or $7,$30
- addu $2,$25
- xor $10,$12
- sll $25,$2,5 # 49
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $10,$18
- and $25,$7,$24
- addu $1,$6
- xor $10,$23
- sll $30,$3,30
- addu $1,$25
- srl $6,$10,31
- xor $25,$7,$24
- addu $10,$10
- and $25,$3
- srl $3,$3,2
- or $10,$6
- addu $1,$9
- or $3,$30
- addu $1,$25
- xor $11,$13
- sll $25,$1,5 # 50
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $11,$19
- and $25,$3,$7
- addu $24,$6
- xor $11,$8
- sll $30,$2,30
- addu $24,$25
- srl $6,$11,31
- xor $25,$3,$7
- addu $11,$11
- and $25,$2
- srl $2,$2,2
- or $11,$6
- addu $24,$10
- or $2,$30
- addu $24,$25
- xor $12,$14
- sll $25,$24,5 # 51
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $12,$20
- and $25,$2,$3
- addu $7,$6
- xor $12,$9
- sll $30,$1,30
- addu $7,$25
- srl $6,$12,31
- xor $25,$2,$3
- addu $12,$12
- and $25,$1
- srl $1,$1,2
- or $12,$6
- addu $7,$11
- or $1,$30
- addu $7,$25
- xor $13,$15
- sll $25,$7,5 # 52
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $13,$21
- and $25,$1,$2
- addu $3,$6
- xor $13,$10
- sll $30,$24,30
- addu $3,$25
- srl $6,$13,31
- xor $25,$1,$2
- addu $13,$13
- and $25,$24
- srl $24,$24,2
- or $13,$6
- addu $3,$12
- or $24,$30
- addu $3,$25
- xor $14,$16
- sll $25,$3,5 # 53
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $14,$22
- and $25,$24,$1
- addu $2,$6
- xor $14,$11
- sll $30,$7,30
- addu $2,$25
- srl $6,$14,31
- xor $25,$24,$1
- addu $14,$14
- and $25,$7
- srl $7,$7,2
- or $14,$6
- addu $2,$13
- or $7,$30
- addu $2,$25
- xor $15,$17
- sll $25,$2,5 # 54
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $15,$23
- and $25,$7,$24
- addu $1,$6
- xor $15,$12
- sll $30,$3,30
- addu $1,$25
- srl $6,$15,31
- xor $25,$7,$24
- addu $15,$15
- and $25,$3
- srl $3,$3,2
- or $15,$6
- addu $1,$14
- or $3,$30
- addu $1,$25
- xor $16,$18
- sll $25,$1,5 # 55
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $16,$8
- and $25,$3,$7
- addu $24,$6
- xor $16,$13
- sll $30,$2,30
- addu $24,$25
- srl $6,$16,31
- xor $25,$3,$7
- addu $16,$16
- and $25,$2
- srl $2,$2,2
- or $16,$6
- addu $24,$15
- or $2,$30
- addu $24,$25
- xor $17,$19
- sll $25,$24,5 # 56
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $17,$9
- and $25,$2,$3
- addu $7,$6
- xor $17,$14
- sll $30,$1,30
- addu $7,$25
- srl $6,$17,31
- xor $25,$2,$3
- addu $17,$17
- and $25,$1
- srl $1,$1,2
- or $17,$6
- addu $7,$16
- or $1,$30
- addu $7,$25
- xor $18,$20
- sll $25,$7,5 # 57
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $18,$10
- and $25,$1,$2
- addu $3,$6
- xor $18,$15
- sll $30,$24,30
- addu $3,$25
- srl $6,$18,31
- xor $25,$1,$2
- addu $18,$18
- and $25,$24
- srl $24,$24,2
- or $18,$6
- addu $3,$17
- or $24,$30
- addu $3,$25
- xor $19,$21
- sll $25,$3,5 # 58
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $19,$11
- and $25,$24,$1
- addu $2,$6
- xor $19,$16
- sll $30,$7,30
- addu $2,$25
- srl $6,$19,31
- xor $25,$24,$1
- addu $19,$19
- and $25,$7
- srl $7,$7,2
- or $19,$6
- addu $2,$18
- or $7,$30
- addu $2,$25
- xor $20,$22
- sll $25,$2,5 # 59
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $20,$12
- and $25,$7,$24
- addu $1,$6
- xor $20,$17
- sll $30,$3,30
- addu $1,$25
- srl $6,$20,31
- xor $25,$7,$24
- addu $20,$20
- and $25,$3
- srl $3,$3,2
- or $20,$6
- addu $1,$19
- or $3,$30
- addu $1,$25
- lui $31,0xca62
- ori $31,0xc1d6 # K_60_79
- xor $21,$23
- sll $25,$1,5 # 60
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $21,$13
- xor $25,$3,$7
- addu $24,$6
- xor $21,$18
- sll $30,$2,30
- xor $25,$2
- srl $6,$21,31
- addu $21,$21
- srl $2,$2,2
- addu $24,$20
- or $21,$6
- or $2,$30
- addu $24,$25
- xor $22,$8
- sll $25,$24,5 # 61
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $22,$14
- xor $25,$2,$3
- addu $7,$6
- xor $22,$19
- sll $30,$1,30
- xor $25,$1
- srl $6,$22,31
- addu $22,$22
- srl $1,$1,2
- addu $7,$21
- or $22,$6
- or $1,$30
- addu $7,$25
- xor $23,$9
- sll $25,$7,5 # 62
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $23,$15
- xor $25,$1,$2
- addu $3,$6
- xor $23,$20
- sll $30,$24,30
- xor $25,$24
- srl $6,$23,31
- addu $23,$23
- srl $24,$24,2
- addu $3,$22
- or $23,$6
- or $24,$30
- addu $3,$25
- xor $8,$10
- sll $25,$3,5 # 63
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $8,$16
- xor $25,$24,$1
- addu $2,$6
- xor $8,$21
- sll $30,$7,30
- xor $25,$7
- srl $6,$8,31
- addu $8,$8
- srl $7,$7,2
- addu $2,$23
- or $8,$6
- or $7,$30
- addu $2,$25
- xor $9,$11
- sll $25,$2,5 # 64
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $9,$17
- xor $25,$7,$24
- addu $1,$6
- xor $9,$22
- sll $30,$3,30
- xor $25,$3
- srl $6,$9,31
- addu $9,$9
- srl $3,$3,2
- addu $1,$8
- or $9,$6
- or $3,$30
- addu $1,$25
- xor $10,$12
- sll $25,$1,5 # 65
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $10,$18
- xor $25,$3,$7
- addu $24,$6
- xor $10,$23
- sll $30,$2,30
- xor $25,$2
- srl $6,$10,31
- addu $10,$10
- srl $2,$2,2
- addu $24,$9
- or $10,$6
- or $2,$30
- addu $24,$25
- xor $11,$13
- sll $25,$24,5 # 66
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $11,$19
- xor $25,$2,$3
- addu $7,$6
- xor $11,$8
- sll $30,$1,30
- xor $25,$1
- srl $6,$11,31
- addu $11,$11
- srl $1,$1,2
- addu $7,$10
- or $11,$6
- or $1,$30
- addu $7,$25
- xor $12,$14
- sll $25,$7,5 # 67
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $12,$20
- xor $25,$1,$2
- addu $3,$6
- xor $12,$9
- sll $30,$24,30
- xor $25,$24
- srl $6,$12,31
- addu $12,$12
- srl $24,$24,2
- addu $3,$11
- or $12,$6
- or $24,$30
- addu $3,$25
- xor $13,$15
- sll $25,$3,5 # 68
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $13,$21
- xor $25,$24,$1
- addu $2,$6
- xor $13,$10
- sll $30,$7,30
- xor $25,$7
- srl $6,$13,31
- addu $13,$13
- srl $7,$7,2
- addu $2,$12
- or $13,$6
- or $7,$30
- addu $2,$25
- xor $14,$16
- sll $25,$2,5 # 69
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $14,$22
- xor $25,$7,$24
- addu $1,$6
- xor $14,$11
- sll $30,$3,30
- xor $25,$3
- srl $6,$14,31
- addu $14,$14
- srl $3,$3,2
- addu $1,$13
- or $14,$6
- or $3,$30
- addu $1,$25
- xor $15,$17
- sll $25,$1,5 # 70
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $15,$23
- xor $25,$3,$7
- addu $24,$6
- xor $15,$12
- sll $30,$2,30
- xor $25,$2
- srl $6,$15,31
- addu $15,$15
- srl $2,$2,2
- addu $24,$14
- or $15,$6
- or $2,$30
- addu $24,$25
- xor $16,$18
- sll $25,$24,5 # 71
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $16,$8
- xor $25,$2,$3
- addu $7,$6
- xor $16,$13
- sll $30,$1,30
- xor $25,$1
- srl $6,$16,31
- addu $16,$16
- srl $1,$1,2
- addu $7,$15
- or $16,$6
- or $1,$30
- addu $7,$25
- xor $17,$19
- sll $25,$7,5 # 72
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $17,$9
- xor $25,$1,$2
- addu $3,$6
- xor $17,$14
- sll $30,$24,30
- xor $25,$24
- srl $6,$17,31
- addu $17,$17
- srl $24,$24,2
- addu $3,$16
- or $17,$6
- or $24,$30
- addu $3,$25
- xor $18,$20
- sll $25,$3,5 # 73
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $18,$10
- xor $25,$24,$1
- addu $2,$6
- xor $18,$15
- sll $30,$7,30
- xor $25,$7
- srl $6,$18,31
- addu $18,$18
- srl $7,$7,2
- addu $2,$17
- or $18,$6
- or $7,$30
- addu $2,$25
- xor $19,$21
- sll $25,$2,5 # 74
- addu $1,$31
- srl $6,$2,27
- addu $1,$25
- xor $19,$11
- xor $25,$7,$24
- addu $1,$6
- xor $19,$16
- sll $30,$3,30
- xor $25,$3
- srl $6,$19,31
- addu $19,$19
- srl $3,$3,2
- addu $1,$18
- or $19,$6
- or $3,$30
- addu $1,$25
- xor $20,$22
- sll $25,$1,5 # 75
- addu $24,$31
- srl $6,$1,27
- addu $24,$25
- xor $20,$12
- xor $25,$3,$7
- addu $24,$6
- xor $20,$17
- sll $30,$2,30
- xor $25,$2
- srl $6,$20,31
- addu $20,$20
- srl $2,$2,2
- addu $24,$19
- or $20,$6
- or $2,$30
- addu $24,$25
- xor $21,$23
- sll $25,$24,5 # 76
- addu $7,$31
- srl $6,$24,27
- addu $7,$25
- xor $21,$13
- xor $25,$2,$3
- addu $7,$6
- xor $21,$18
- sll $30,$1,30
- xor $25,$1
- srl $6,$21,31
- addu $21,$21
- srl $1,$1,2
- addu $7,$20
- or $21,$6
- or $1,$30
- addu $7,$25
- xor $22,$8
- sll $25,$7,5 # 77
- addu $3,$31
- srl $6,$7,27
- addu $3,$25
- xor $22,$14
- xor $25,$1,$2
- addu $3,$6
- xor $22,$19
- sll $30,$24,30
- xor $25,$24
- srl $6,$22,31
- addu $22,$22
- srl $24,$24,2
- addu $3,$21
- or $22,$6
- or $24,$30
- addu $3,$25
- xor $23,$9
- sll $25,$3,5 # 78
- addu $2,$31
- srl $6,$3,27
- addu $2,$25
- xor $23,$15
- xor $25,$24,$1
- addu $2,$6
- xor $23,$20
- sll $30,$7,30
- xor $25,$7
- srl $6,$23,31
- addu $23,$23
- srl $7,$7,2
- addu $2,$22
- or $23,$6
- or $7,$30
- addu $2,$25
- lw $8,0($4)
- sll $25,$2,5 # 79
- addu $1,$31
- lw $9,4($4)
- srl $6,$2,27
- addu $1,$25
- lw $10,8($4)
- xor $25,$7,$24
- addu $1,$6
- lw $11,12($4)
- sll $30,$3,30
- xor $25,$3
- lw $12,16($4)
- srl $3,$3,2
- addu $1,$23
- or $3,$30
- addu $1,$25
- add $5,64
- lw $6,0($29)
-
- addu $1,$8
- addu $2,$9
- sw $1,0($4)
- addu $3,$10
- addu $7,$11
- sw $2,4($4)
- addu $24,$12
- sw $3,8($4)
- sw $7,12($4)
- sw $24,16($4)
- .set noreorder
- bne $5,$6,.Loop
- nop
-
- .set noreorder
- lw $31,(16-1)*4($29)
- lw $30,(16-2)*4($29)
- lw $23,(16-3)*4($29)
- lw $22,(16-4)*4($29)
- lw $21,(16-5)*4($29)
- lw $20,(16-6)*4($29)
- lw $19,(16-7)*4($29)
- lw $18,(16-8)*4($29)
- lw $17,(16-9)*4($29)
- lw $16,(16-10)*4($29)
- jr $31
- add $29,16*4
-.end sha1_block_data_order
-.rdata
-.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
diff --git a/app/openssl/crypto/sha/asm/sha1-mips.pl b/app/openssl/crypto/sha/asm/sha1-mips.pl
deleted file mode 100644
index f1a702f3..00000000
--- a/app/openssl/crypto/sha/asm/sha1-mips.pl
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA1 block procedure for MIPS.
-
-# Performance improvement is 30% on unaligned input. The "secret" is
-# to deploy lwl/lwr pair to load unaligned input. One could have
-# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
-# compatible subroutine. There is room for minor optimization on
-# little-endian platforms...
-
-######################################################################
-# There is a number of MIPS ABI in use, O32 and N32/64 are most
-# widely used. Then there is a new contender: NUBI. It appears that if
-# one picks the latter, it's possible to arrange code in ABI neutral
-# manner. Therefore let's stick to NUBI register layout:
-#
-($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-#
-# The return value is placed in $a0. Following coding rules facilitate
-# interoperability:
-#
-# - never ever touch $tp, "thread pointer", former $gp;
-# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-# old code];
-# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-#
-# For reference here is register layout for N32/64 MIPS ABIs:
-#
-# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-#
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-
-if ($flavour =~ /64|n32/i) {
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
- $REG_S="sd";
- $REG_L="ld";
- $PTR_SLL="dsll"; # incidentally works even on n32
- $SZREG=8;
-} else {
- $PTR_ADD="add";
- $PTR_SUB="sub";
- $REG_S="sw";
- $REG_L="lw";
- $PTR_SLL="sll";
- $SZREG=4;
-}
-#
-# <appro@openssl.org>
-#
-######################################################################
-
-$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-
-for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
-open STDOUT,">$output";
-
-if (!defined($big_endian))
- { $big_endian=(unpack('L',pack('N',1))==1); }
-
-# offsets of the Most and Least Significant Bytes
-$MSB=$big_endian?0:3;
-$LSB=3&~$MSB;
-
-@X=map("\$$_",(8..23)); # a4-a7,s0-s11
-
-$ctx=$a0;
-$inp=$a1;
-$num=$a2;
-$A="\$1";
-$B="\$2";
-$C="\$3";
-$D="\$7";
-$E="\$24"; @V=($A,$B,$C,$D,$E);
-$t0="\$25";
-$t1=$num; # $num is offloaded to stack
-$t2="\$30"; # fp
-$K="\$31"; # ra
-
-sub BODY_00_14 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if (!$big_endian);
- srl $t0,@X[$i],24 # byte swap($i)
- srl $t1,@X[$i],8
- andi $t2,@X[$i],0xFF00
- sll @X[$i],@X[$i],24
- andi $t1,0xFF00
- sll $t2,$t2,8
- or @X[$i],$t0
- or $t1,$t2
- or @X[$i],$t1
-___
-$code.=<<___;
- lwl @X[$j],$j*4+$MSB($inp)
- sll $t0,$a,5 # $i
- addu $e,$K
- lwr @X[$j],$j*4+$LSB($inp)
- srl $t1,$a,27
- addu $e,$t0
- xor $t0,$c,$d
- addu $e,$t1
- sll $t2,$b,30
- and $t0,$b
- srl $b,$b,2
- xor $t0,$d
- addu $e,@X[$i]
- or $b,$t2
- addu $e,$t0
-___
-}
-
-sub BODY_15_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-
-$code.=<<___ if (!$big_endian && $i==15);
- srl $t0,@X[$i],24 # byte swap($i)
- srl $t1,@X[$i],8
- andi $t2,@X[$i],0xFF00
- sll @X[$i],@X[$i],24
- andi $t1,0xFF00
- sll $t2,$t2,8
- or @X[$i],$t0
- or @X[$i],$t1
- or @X[$i],$t2
-___
-$code.=<<___;
- xor @X[$j%16],@X[($j+2)%16]
- sll $t0,$a,5 # $i
- addu $e,$K
- srl $t1,$a,27
- addu $e,$t0
- xor @X[$j%16],@X[($j+8)%16]
- xor $t0,$c,$d
- addu $e,$t1
- xor @X[$j%16],@X[($j+13)%16]
- sll $t2,$b,30
- and $t0,$b
- srl $t1,@X[$j%16],31
- addu @X[$j%16],@X[$j%16]
- srl $b,$b,2
- xor $t0,$d
- or @X[$j%16],$t1
- addu $e,@X[$i%16]
- or $b,$t2
- addu $e,$t0
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<79);
- xor @X[$j%16],@X[($j+2)%16]
- sll $t0,$a,5 # $i
- addu $e,$K
- srl $t1,$a,27
- addu $e,$t0
- xor @X[$j%16],@X[($j+8)%16]
- xor $t0,$c,$d
- addu $e,$t1
- xor @X[$j%16],@X[($j+13)%16]
- sll $t2,$b,30
- xor $t0,$b
- srl $t1,@X[$j%16],31
- addu @X[$j%16],@X[$j%16]
- srl $b,$b,2
- addu $e,@X[$i%16]
- or @X[$j%16],$t1
- or $b,$t2
- addu $e,$t0
-___
-$code.=<<___ if ($i==79);
- lw @X[0],0($ctx)
- sll $t0,$a,5 # $i
- addu $e,$K
- lw @X[1],4($ctx)
- srl $t1,$a,27
- addu $e,$t0
- lw @X[2],8($ctx)
- xor $t0,$c,$d
- addu $e,$t1
- lw @X[3],12($ctx)
- sll $t2,$b,30
- xor $t0,$b
- lw @X[4],16($ctx)
- srl $b,$b,2
- addu $e,@X[$i%16]
- or $b,$t2
- addu $e,$t0
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<79);
- xor @X[$j%16],@X[($j+2)%16]
- sll $t0,$a,5 # $i
- addu $e,$K
- srl $t1,$a,27
- addu $e,$t0
- xor @X[$j%16],@X[($j+8)%16]
- and $t0,$c,$d
- addu $e,$t1
- xor @X[$j%16],@X[($j+13)%16]
- sll $t2,$b,30
- addu $e,$t0
- srl $t1,@X[$j%16],31
- xor $t0,$c,$d
- addu @X[$j%16],@X[$j%16]
- and $t0,$b
- srl $b,$b,2
- or @X[$j%16],$t1
- addu $e,@X[$i%16]
- or $b,$t2
- addu $e,$t0
-___
-}
-
-$FRAMESIZE=16; # large enough to accomodate NUBI saved registers
-$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-
-$code=<<___;
-#ifdef OPENSSL_FIPSCANISTER
-# include <openssl/fipssyms.h>
-#endif
-
-.text
-
-.set noat
-.set noreorder
-.align 5
-.globl sha1_block_data_order
-.ent sha1_block_data_order
-sha1_block_data_order:
- .frame $sp,$FRAMESIZE*$SZREG,$ra
- .mask $SAVED_REGS_MASK,-$SZREG
- .set noreorder
- $PTR_SUB $sp,$FRAMESIZE*$SZREG
- $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
- $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
- $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
- $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
- $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
- $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
- $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
- $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
- $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
- $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
-___
-$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
- $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
- $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
- $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
- $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
- $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
-___
-$code.=<<___;
- $PTR_SLL $num,6
- $PTR_ADD $num,$inp
- $REG_S $num,0($sp)
- lw $A,0($ctx)
- lw $B,4($ctx)
- lw $C,8($ctx)
- lw $D,12($ctx)
- b .Loop
- lw $E,16($ctx)
-.align 4
-.Loop:
- .set reorder
- lwl @X[0],$MSB($inp)
- lui $K,0x5a82
- lwr @X[0],$LSB($inp)
- ori $K,0x7999 # K_00_19
-___
-for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
-for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- lui $K,0x6ed9
- ori $K,0xeba1 # K_20_39
-___
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- lui $K,0x8f1b
- ori $K,0xbcdc # K_40_59
-___
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- lui $K,0xca62
- ori $K,0xc1d6 # K_60_79
-___
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- $PTR_ADD $inp,64
- $REG_L $num,0($sp)
-
- addu $A,$X[0]
- addu $B,$X[1]
- sw $A,0($ctx)
- addu $C,$X[2]
- addu $D,$X[3]
- sw $B,4($ctx)
- addu $E,$X[4]
- sw $C,8($ctx)
- sw $D,12($ctx)
- sw $E,16($ctx)
- .set noreorder
- bne $inp,$num,.Loop
- nop
-
- .set noreorder
- $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
- $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
- $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
- $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
- $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
- $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
- $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
- $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
- $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
- $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
-___
-$code.=<<___ if ($flavour =~ /nubi/i);
- $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
- $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
- $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
- $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
- $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
-___
-$code.=<<___;
- jr $ra
- $PTR_ADD $sp,$FRAMESIZE*$SZREG
-.end sha1_block_data_order
-.rdata
-.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-___
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-parisc.pl b/app/openssl/crypto/sha/asm/sha1-parisc.pl
deleted file mode 100644
index 6e5a328a..00000000
--- a/app/openssl/crypto/sha/asm/sha1-parisc.pl
+++ /dev/null
@@ -1,260 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA1 block procedure for PA-RISC.
-
-# June 2009.
-#
-# On PA-7100LC performance is >30% better than gcc 3.2 generated code
-# for aligned input and >50% better for unaligned. Compared to vendor
-# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
-# few percent faster in 32-bit one (this for aligned input, data for
-# unaligned input is not available).
-#
-# Special thanks to polarhome.com for providing HP-UX account.
-
-$flavour = shift;
-$output = shift;
-open STDOUT,">$output";
-
-if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
-} else {
- $LEVEL ="1.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
-}
-
-$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
- # [+ argument transfer]
-$ctx="%r26"; # arg0
-$inp="%r25"; # arg1
-$num="%r24"; # arg2
-
-$t0="%r28";
-$t1="%r29";
-$K="%r31";
-
-@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
- "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
-
-@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<15);
- addl $K,$e,$e ; $i
- shd $a,$a,27,$t1
- addl @X[$i],$e,$e
- and $c,$b,$t0
- addl $t1,$e,$e
- andcm $d,$b,$t1
- shd $b,$b,2,$b
- or $t1,$t0,$t0
- addl $t0,$e,$e
-___
-$code.=<<___ if ($i>=15); # with forward Xupdate
- addl $K,$e,$e ; $i
- shd $a,$a,27,$t1
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
- addl @X[$i%16],$e,$e
- and $c,$b,$t0
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- addl $t1,$e,$e
- andcm $d,$b,$t1
- shd $b,$b,2,$b
- or $t1,$t0,$t0
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- add $t0,$e,$e
- shd @X[$j%16],@X[$j%16],31,@X[$j%16]
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<79);
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
- addl $K,$e,$e
- shd $a,$a,27,$t1
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- addl @X[$i%16],$e,$e
- xor $b,$c,$t0
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- addl $t1,$e,$e
- shd $b,$b,2,$b
- xor $d,$t0,$t0
- shd @X[$j%16],@X[$j%16],31,@X[$j%16]
- addl $t0,$e,$e
-___
-$code.=<<___ if ($i==79); # with context load
- ldw 0($ctx),@X[0] ; $i
- addl $K,$e,$e
- shd $a,$a,27,$t1
- ldw 4($ctx),@X[1]
- addl @X[$i%16],$e,$e
- xor $b,$c,$t0
- ldw 8($ctx),@X[2]
- addl $t1,$e,$e
- shd $b,$b,2,$b
- xor $d,$t0,$t0
- ldw 12($ctx),@X[3]
- addl $t0,$e,$e
- ldw 16($ctx),@X[4]
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___;
- shd $a,$a,27,$t1 ; $i
- addl $K,$e,$e
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
- xor $d,$c,$t0
- addl @X[$i%16],$e,$e
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- and $b,$t0,$t0
- addl $t1,$e,$e
- shd $b,$b,2,$b
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- addl $t0,$e,$e
- and $d,$c,$t1
- shd @X[$j%16],@X[$j%16],31,@X[$j%16]
- addl $t1,$e,$e
-___
-}
-
-$code=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
-
- .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
-sha1_block_data_order
- .PROC
- .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
- $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
- $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
- $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
- $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
- $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
-
- ldw 0($ctx),$A
- ldw 4($ctx),$B
- ldw 8($ctx),$C
- ldw 12($ctx),$D
- ldw 16($ctx),$E
-
- extru $inp,31,2,$t0 ; t0=inp&3;
- sh3addl $t0,%r0,$t0 ; t0*=8;
- subi 32,$t0,$t0 ; t0=32-t0;
- mtctl $t0,%cr11 ; %sar=t0;
-
-L\$oop
- ldi 3,$t0
- andcm $inp,$t0,$t0 ; 64-bit neutral
-___
- for ($i=0;$i<15;$i++) { # load input block
- $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
-$code.=<<___;
- cmpb,*= $inp,$t0,L\$aligned
- ldw 60($t0),@X[15]
- ldw 64($t0),@X[16]
-___
- for ($i=0;$i<16;$i++) { # align input
- $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
-$code.=<<___;
-L\$aligned
- ldil L'0x5a827000,$K ; K_00_19
- ldo 0x999($K),$K
-___
-for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- ldil L'0x6ed9e000,$K ; K_20_39
- ldo 0xba1($K),$K
-___
-
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- ldil L'0x8f1bb000,$K ; K_40_59
- ldo 0xcdc($K),$K
-___
-
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- ldil L'0xca62c000,$K ; K_60_79
- ldo 0x1d6($K),$K
-___
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- addl @X[0],$A,$A
- addl @X[1],$B,$B
- addl @X[2],$C,$C
- addl @X[3],$D,$D
- addl @X[4],$E,$E
- stw $A,0($ctx)
- stw $B,4($ctx)
- stw $C,8($ctx)
- stw $D,12($ctx)
- stw $E,16($ctx)
- addib,*<> -1,$num,L\$oop
- ldo 64($inp),$inp
-
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
- $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
- $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
- $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
- $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
- $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
- .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/,\*/,/gm if ($SIZE_T==4);
-$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-ppc.pl b/app/openssl/crypto/sha/asm/sha1-ppc.pl
deleted file mode 100755
index 2140dd2f..00000000
--- a/app/openssl/crypto/sha/asm/sha1-ppc.pl
+++ /dev/null
@@ -1,326 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# I let hardware handle unaligned input(*), except on page boundaries
-# (see below for details). Otherwise straightforward implementation
-# with X vector in register bank. The module is big-endian [which is
-# not big deal as there're no little-endian targets left around].
-#
-# (*) this means that this module is inappropriate for PPC403? Does
-# anybody know if pre-POWER3 can sustain unaligned load?
-
-# -m64 -m32
-# ----------------------------------
-# PPC970,gcc-4.0.0 +76% +59%
-# Power6,xlc-7 +68% +33%
-
-$flavour = shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T =8;
- $LRSAVE =2*$SIZE_T;
- $UCMP ="cmpld";
- $STU ="stdu";
- $POP ="ld";
- $PUSH ="std";
-} elsif ($flavour =~ /32/) {
- $SIZE_T =4;
- $LRSAVE =$SIZE_T;
- $UCMP ="cmplw";
- $STU ="stwu";
- $POP ="lwz";
- $PUSH ="stw";
-} else { die "nonsense $flavour"; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-
-$FRAME=24*$SIZE_T+64;
-$LOCALS=6*$SIZE_T;
-
-$K ="r0";
-$sp ="r1";
-$toc="r2";
-$ctx="r3";
-$inp="r4";
-$num="r5";
-$t0 ="r15";
-$t1 ="r6";
-
-$A ="r7";
-$B ="r8";
-$C ="r9";
-$D ="r10";
-$E ="r11";
-$T ="r12";
-
-@V=($A,$B,$C,$D,$E,$T);
-@X=("r16","r17","r18","r19","r20","r21","r22","r23",
- "r24","r25","r26","r27","r28","r29","r30","r31");
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e,$f)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i==0);
- lwz @X[$i],`$i*4`($inp)
-___
-$code.=<<___ if ($i<15);
- lwz @X[$j],`$j*4`($inp)
- add $f,$K,$e
- rotlwi $e,$a,5
- add $f,$f,@X[$i]
- and $t0,$c,$b
- add $f,$f,$e
- andc $t1,$d,$b
- rotlwi $b,$b,30
- or $t0,$t0,$t1
- add $f,$f,$t0
-___
-$code.=<<___ if ($i>=15);
- add $f,$K,$e
- rotlwi $e,$a,5
- xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
- add $f,$f,@X[$i%16]
- and $t0,$c,$b
- xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
- add $f,$f,$e
- andc $t1,$d,$b
- rotlwi $b,$b,30
- or $t0,$t0,$t1
- xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
- add $f,$f,$t0
- rotlwi @X[$j%16],@X[$j%16],1
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e,$f)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i<79);
- add $f,$K,$e
- rotlwi $e,$a,5
- xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
- add $f,$f,@X[$i%16]
- xor $t0,$b,$c
- xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
- add $f,$f,$e
- rotlwi $b,$b,30
- xor $t0,$t0,$d
- xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
- add $f,$f,$t0
- rotlwi @X[$j%16],@X[$j%16],1
-___
-$code.=<<___ if ($i==79);
- add $f,$K,$e
- rotlwi $e,$a,5
- lwz r16,0($ctx)
- add $f,$f,@X[$i%16]
- xor $t0,$b,$c
- lwz r17,4($ctx)
- add $f,$f,$e
- rotlwi $b,$b,30
- lwz r18,8($ctx)
- xor $t0,$t0,$d
- lwz r19,12($ctx)
- add $f,$f,$t0
- lwz r20,16($ctx)
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e,$f)=@_;
-my $j=$i+1;
-$code.=<<___;
- add $f,$K,$e
- rotlwi $e,$a,5
- xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
- add $f,$f,@X[$i%16]
- and $t0,$b,$c
- xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
- add $f,$f,$e
- or $t1,$b,$c
- rotlwi $b,$b,30
- xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
- and $t1,$t1,$d
- or $t0,$t0,$t1
- rotlwi @X[$j%16],@X[$j%16],1
- add $f,$f,$t0
-___
-}
-
-$code=<<___;
-.machine "any"
-.text
-
-.globl .sha1_block_data_order
-.align 4
-.sha1_block_data_order:
- $STU $sp,-$FRAME($sp)
- mflr r0
- $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
- $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
- $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
- $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
- $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
- $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
- $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
- $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
- $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
- $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
- $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
- $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
- $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
- $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
- $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
- $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
- $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
- $PUSH r0,`$FRAME+$LRSAVE`($sp)
- lwz $A,0($ctx)
- lwz $B,4($ctx)
- lwz $C,8($ctx)
- lwz $D,12($ctx)
- lwz $E,16($ctx)
- andi. r0,$inp,3
- bne Lunaligned
-Laligned:
- mtctr $num
- bl Lsha1_block_private
- b Ldone
-
-; PowerPC specification allows an implementation to be ill-behaved
-; upon unaligned access which crosses page boundary. "Better safe
-; than sorry" principle makes me treat it specially. But I don't
-; look for particular offending word, but rather for 64-byte input
-; block which crosses the boundary. Once found that block is aligned
-; and hashed separately...
-.align 4
-Lunaligned:
- subfic $t1,$inp,4096
- andi. $t1,$t1,4095 ; distance to closest page boundary
- srwi. $t1,$t1,6 ; t1/=64
- beq Lcross_page
- $UCMP $num,$t1
- ble- Laligned ; didn't cross the page boundary
- mtctr $t1
- subfc $num,$t1,$num
- bl Lsha1_block_private
-Lcross_page:
- li $t1,16
- mtctr $t1
- addi r20,$sp,$LOCALS ; spot within the frame
-Lmemcpy:
- lbz r16,0($inp)
- lbz r17,1($inp)
- lbz r18,2($inp)
- lbz r19,3($inp)
- addi $inp,$inp,4
- stb r16,0(r20)
- stb r17,1(r20)
- stb r18,2(r20)
- stb r19,3(r20)
- addi r20,r20,4
- bdnz Lmemcpy
-
- $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
- li $t1,1
- addi $inp,$sp,$LOCALS
- mtctr $t1
- bl Lsha1_block_private
- $POP $inp,`$FRAME-$SIZE_T*18`($sp)
- addic. $num,$num,-1
- bne- Lunaligned
-
-Ldone:
- $POP r0,`$FRAME+$LRSAVE`($sp)
- $POP r15,`$FRAME-$SIZE_T*17`($sp)
- $POP r16,`$FRAME-$SIZE_T*16`($sp)
- $POP r17,`$FRAME-$SIZE_T*15`($sp)
- $POP r18,`$FRAME-$SIZE_T*14`($sp)
- $POP r19,`$FRAME-$SIZE_T*13`($sp)
- $POP r20,`$FRAME-$SIZE_T*12`($sp)
- $POP r21,`$FRAME-$SIZE_T*11`($sp)
- $POP r22,`$FRAME-$SIZE_T*10`($sp)
- $POP r23,`$FRAME-$SIZE_T*9`($sp)
- $POP r24,`$FRAME-$SIZE_T*8`($sp)
- $POP r25,`$FRAME-$SIZE_T*7`($sp)
- $POP r26,`$FRAME-$SIZE_T*6`($sp)
- $POP r27,`$FRAME-$SIZE_T*5`($sp)
- $POP r28,`$FRAME-$SIZE_T*4`($sp)
- $POP r29,`$FRAME-$SIZE_T*3`($sp)
- $POP r30,`$FRAME-$SIZE_T*2`($sp)
- $POP r31,`$FRAME-$SIZE_T*1`($sp)
- mtlr r0
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,4,1,0x80,18,3,0
- .long 0
-___
-
-# This is private block function, which uses tailored calling
-# interface, namely upon entry SHA_CTX is pre-loaded to given
-# registers and counter register contains amount of chunks to
-# digest...
-$code.=<<___;
-.align 4
-Lsha1_block_private:
-___
-$code.=<<___; # load K_00_19
- lis $K,0x5a82
- ori $K,$K,0x7999
-___
-for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___; # load K_20_39
- lis $K,0x6ed9
- ori $K,$K,0xeba1
-___
-for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___; # load K_40_59
- lis $K,0x8f1b
- ori $K,$K,0xbcdc
-___
-for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___; # load K_60_79
- lis $K,0xca62
- ori $K,$K,0xc1d6
-___
-for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- add r16,r16,$E
- add r17,r17,$T
- add r18,r18,$A
- add r19,r19,$B
- add r20,r20,$C
- stw r16,0($ctx)
- mr $A,r16
- stw r17,4($ctx)
- mr $B,r17
- stw r18,8($ctx)
- mr $C,r18
- stw r19,12($ctx)
- mr $D,r19
- stw r20,16($ctx)
- mr $E,r20
- addi $inp,$inp,`16*4`
- bdnz- Lsha1_block_private
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-___
-$code.=<<___;
-.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-s390x.pl b/app/openssl/crypto/sha/asm/sha1-s390x.pl
deleted file mode 100644
index 9193dda4..00000000
--- a/app/openssl/crypto/sha/asm/sha1-s390x.pl
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA1 block procedure for s390x.
-
-# April 2007.
-#
-# Performance is >30% better than gcc 3.3 generated code. But the real
-# twist is that SHA1 hardware support is detected and utilized. In
-# which case performance can reach further >4.5x for larger chunks.
-
-# January 2009.
-#
-# Optimize Xupdate for amount of memory references and reschedule
-# instructions to favour dual-issue z10 pipeline. On z10 hardware is
-# "only" ~2.3x faster than software.
-
-# November 2010.
-#
-# Adapt for -m31 build. If kernel supports what's called "highgprs"
-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
-# instructions and achieve "64-bit" performance even in 31-bit legacy
-# application context. The feature is not specific to any particular
-# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific.
-
-$kimdfunc=1; # magic function code for kimd instruction
-
-$flavour = shift;
-
-if ($flavour =~ /3[12]/) {
- $SIZE_T=4;
- $g="";
-} else {
- $SIZE_T=8;
- $g="g";
-}
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-$K_00_39="%r0"; $K=$K_00_39;
-$K_40_79="%r1";
-$ctx="%r2"; $prefetch="%r2";
-$inp="%r3";
-$len="%r4";
-
-$A="%r5";
-$B="%r6";
-$C="%r7";
-$D="%r8";
-$E="%r9"; @V=($A,$B,$C,$D,$E);
-$t0="%r10";
-$t1="%r11";
-@X=("%r12","%r13","%r14");
-$sp="%r15";
-
-$stdframe=16*$SIZE_T+4*8;
-$frame=$stdframe+16*4;
-
-sub Xupdate {
-my $i=shift;
-
-$code.=<<___ if ($i==15);
- lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up
- lr $X[0],$X[2]
-___
-return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
-$code.=<<___ if ($i<16);
- lg $X[0],`$i*4`($inp) ### Xload($i)
- rllg $X[1],$X[0],32
-___
-$code.=<<___ if ($i>=16);
- xgr $X[0],$prefetch ### Xupdate($i)
- lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp)
- xg $X[0],`$stdframe+4*(($i+8)%16)`($sp)
- xgr $X[0],$prefetch
- rll $X[0],$X[0],1
- rllg $X[1],$X[0],32
- rll $X[1],$X[1],1
- rllg $X[0],$X[1],32
- lr $X[2],$X[1] # feedback
-___
-$code.=<<___ if ($i<=70);
- stg $X[0],`$stdframe+4*($i%16)`($sp)
-___
-unshift(@X,pop(@X));
-}
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$d
- xr $t0,$c
- alr $e,$t1
- nr $t0,$b
- alr $e,$xi
- xr $t0,$d
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$b
- alr $e,$t1
- xr $t0,$c
- alr $e,$xi
- xr $t0,$d
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$b
- alr $e,$t1
- or $t0,$c
- lr $t1,$b
- nr $t0,$d
- nr $t1,$c
- alr $e,$xi
- or $t0,$t1
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-$code.=<<___;
-.text
-.align 64
-.type Ktable,\@object
-Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
- .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
-.size Ktable,.-Ktable
-.globl sha1_block_data_order
-.type sha1_block_data_order,\@function
-sha1_block_data_order:
-___
-$code.=<<___ if ($kimdfunc);
- larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lsoftware
- lghi %r0,0
- la %r1,`2*$SIZE_T`($sp)
- .long 0xb93e0002 # kimd %r0,%r2
- lg %r0,`2*$SIZE_T`($sp)
- tmhh %r0,`0x8000>>$kimdfunc`
- jz .Lsoftware
- lghi %r0,$kimdfunc
- lgr %r1,$ctx
- lgr %r2,$inp
- sllg %r3,$len,6
- .long 0xb93e0002 # kimd %r0,%r2
- brc 1,.-4 # pay attention to "partial completion"
- br %r14
-.align 16
-.Lsoftware:
-___
-$code.=<<___;
- lghi %r1,-$frame
- st${g} $ctx,`2*$SIZE_T`($sp)
- stm${g} %r6,%r15,`6*$SIZE_T`($sp)
- lgr %r0,$sp
- la $sp,0(%r1,$sp)
- st${g} %r0,0($sp)
-
- larl $t0,Ktable
- llgf $A,0($ctx)
- llgf $B,4($ctx)
- llgf $C,8($ctx)
- llgf $D,12($ctx)
- llgf $E,16($ctx)
-
- lg $K_00_39,0($t0)
- lg $K_40_79,8($t0)
-
-.Lloop:
- rllg $K_00_39,$K_00_39,32
-___
-for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- rllg $K_00_39,$K_00_39,32
-___
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___; $K=$K_40_79;
- rllg $K_40_79,$K_40_79,32
-___
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- rllg $K_40_79,$K_40_79,32
-___
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
-
- l${g} $ctx,`$frame+2*$SIZE_T`($sp)
- la $inp,64($inp)
- al $A,0($ctx)
- al $B,4($ctx)
- al $C,8($ctx)
- al $D,12($ctx)
- al $E,16($ctx)
- st $A,0($ctx)
- st $B,4($ctx)
- st $C,8($ctx)
- st $D,12($ctx)
- st $E,16($ctx)
- brct${g} $len,.Lloop
-
- lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
- br %r14
-.size sha1_block_data_order,.-sha1_block_data_order
-.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,16,8
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-sparcv9.pl b/app/openssl/crypto/sha/asm/sha1-sparcv9.pl
deleted file mode 100644
index 5c161cec..00000000
--- a/app/openssl/crypto/sha/asm/sha1-sparcv9.pl
+++ /dev/null
@@ -1,284 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# Performance improvement is not really impressive on pre-T1 CPU: +8%
-# over Sun C and +25% over gcc [3.3]. While on T1, a.k.a. Niagara, it
-# turned to be 40% faster than 64-bit code generated by Sun C 5.8 and
-# >2x than 64-bit code generated by gcc 3.4. And there is a gimmick.
-# X[16] vector is packed to 8 64-bit registers and as result nothing
-# is spilled on stack. In addition input data is loaded in compact
-# instruction sequence, thus minimizing the window when the code is
-# subject to [inter-thread] cache-thrashing hazard. The goal is to
-# ensure scalability on UltraSPARC T1, or rather to avoid decay when
-# amount of active threads exceeds the number of physical cores.
-
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
-
-$output=shift;
-open STDOUT,">$output";
-
-@X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7");
-$rot1m="%g2";
-$tmp64="%g3";
-$Xi="%g4";
-$A="%l0";
-$B="%l1";
-$C="%l2";
-$D="%l3";
-$E="%l4";
-@V=($A,$B,$C,$D,$E);
-$K_00_19="%l5";
-$K_20_39="%l6";
-$K_40_59="%l7";
-$K_60_79="%g5";
-@K=($K_00_19,$K_20_39,$K_40_59,$K_60_79);
-
-$ctx="%i0";
-$inp="%i1";
-$len="%i2";
-$tmp0="%i3";
-$tmp1="%i4";
-$tmp2="%i5";
-
-sub BODY_00_15 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=($i&1)?@X[($i/2)%8]:$Xi;
-
-$code.=<<___;
- sll $a,5,$tmp0 !! $i
- add @K[$i/20],$e,$e
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- and $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- andn $d,$b,$tmp1
- srl $b,2,$b
- or $tmp1,$tmp0,$tmp1
- or $tmp2,$b,$b
- add $xi,$e,$e
-___
-if ($i&1 && $i<15) {
- $code.=
- " srlx @X[(($i+1)/2)%8],32,$Xi\n";
-}
-$code.=<<___;
- add $tmp1,$e,$e
-___
-}
-
-sub Xupdate {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i/2;
-
-if ($i&1) {
-$code.=<<___;
- sll $a,5,$tmp0 !! $i
- add @K[$i/20],$e,$e
- srl $a,27,$tmp1
-___
-} else {
-$code.=<<___;
- sllx @X[($j+6)%8],32,$Xi ! Xupdate($i)
- xor @X[($j+1)%8],@X[$j%8],@X[$j%8]
- srlx @X[($j+7)%8],32,$tmp1
- xor @X[($j+4)%8],@X[$j%8],@X[$j%8]
- sll $a,5,$tmp0 !! $i
- or $tmp1,$Xi,$Xi
- add @K[$i/20],$e,$e !!
- xor $Xi,@X[$j%8],@X[$j%8]
- srlx @X[$j%8],31,$Xi
- add @X[$j%8],@X[$j%8],@X[$j%8]
- and $Xi,$rot1m,$Xi
- andn @X[$j%8],$rot1m,@X[$j%8]
- srl $a,27,$tmp1 !!
- or $Xi,@X[$j%8],@X[$j%8]
-___
-}
-}
-
-sub BODY_16_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-
- &Xupdate(@_);
- if ($i&1) {
- $xi=@X[($i/2)%8];
- } else {
- $xi=$Xi;
- $code.="\tsrlx @X[($i/2)%8],32,$xi\n";
- }
-$code.=<<___;
- add $tmp0,$e,$e !!
- and $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- add $xi,$e,$e
- andn $d,$b,$tmp1
- srl $b,2,$b
- or $tmp1,$tmp0,$tmp1
- or $tmp2,$b,$b
- add $tmp1,$e,$e
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi;
- &Xupdate(@_);
- if ($i&1) {
- $xi=@X[($i/2)%8];
- } else {
- $xi=$Xi;
- $code.="\tsrlx @X[($i/2)%8],32,$xi\n";
- }
-$code.=<<___;
- add $tmp0,$e,$e !!
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $xi,$e,$e
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi;
- &Xupdate(@_);
- if ($i&1) {
- $xi=@X[($i/2)%8];
- } else {
- $xi=$Xi;
- $code.="\tsrlx @X[($i/2)%8],32,$xi\n";
- }
-$code.=<<___;
- add $tmp0,$e,$e !!
- and $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- or $c,$b,$tmp1
- srl $b,2,$b
- and $d,$tmp1,$tmp1
- add $xi,$e,$e
- or $tmp1,$tmp0,$tmp1
- or $tmp2,$b,$b
- add $tmp1,$e,$e
-___
-}
-
-$code.=<<___ if ($bits==64);
-.register %g2,#scratch
-.register %g3,#scratch
-___
-$code.=<<___;
-.section ".text",#alloc,#execinstr
-
-.align 32
-.globl sha1_block_data_order
-sha1_block_data_order:
- save %sp,-$frame,%sp
- sllx $len,6,$len
- add $inp,$len,$len
-
- or %g0,1,$rot1m
- sllx $rot1m,32,$rot1m
- or $rot1m,1,$rot1m
-
- ld [$ctx+0],$A
- ld [$ctx+4],$B
- ld [$ctx+8],$C
- ld [$ctx+12],$D
- ld [$ctx+16],$E
- andn $inp,7,$tmp0
-
- sethi %hi(0x5a827999),$K_00_19
- or $K_00_19,%lo(0x5a827999),$K_00_19
- sethi %hi(0x6ed9eba1),$K_20_39
- or $K_20_39,%lo(0x6ed9eba1),$K_20_39
- sethi %hi(0x8f1bbcdc),$K_40_59
- or $K_40_59,%lo(0x8f1bbcdc),$K_40_59
- sethi %hi(0xca62c1d6),$K_60_79
- or $K_60_79,%lo(0xca62c1d6),$K_60_79
-
-.Lloop:
- ldx [$tmp0+0],@X[0]
- ldx [$tmp0+16],@X[2]
- ldx [$tmp0+32],@X[4]
- ldx [$tmp0+48],@X[6]
- and $inp,7,$tmp1
- ldx [$tmp0+8],@X[1]
- sll $tmp1,3,$tmp1
- ldx [$tmp0+24],@X[3]
- subcc %g0,$tmp1,$tmp2 ! should be 64-$tmp1, but -$tmp1 works too
- ldx [$tmp0+40],@X[5]
- bz,pt %icc,.Laligned
- ldx [$tmp0+56],@X[7]
-
- sllx @X[0],$tmp1,@X[0]
- ldx [$tmp0+64],$tmp64
-___
-for($i=0;$i<7;$i++)
-{ $code.=<<___;
- srlx @X[$i+1],$tmp2,$Xi
- sllx @X[$i+1],$tmp1,@X[$i+1]
- or $Xi,@X[$i],@X[$i]
-___
-}
-$code.=<<___;
- srlx $tmp64,$tmp2,$tmp64
- or $tmp64,@X[7],@X[7]
-.Laligned:
- srlx @X[0],32,$Xi
-___
-for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
-for (;$i<20;$i++) { &BODY_16_19($i,@V); unshift(@V,pop(@V)); }
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
-
- ld [$ctx+0],@X[0]
- ld [$ctx+4],@X[1]
- ld [$ctx+8],@X[2]
- ld [$ctx+12],@X[3]
- add $inp,64,$inp
- ld [$ctx+16],@X[4]
- cmp $inp,$len
-
- add $A,@X[0],$A
- st $A,[$ctx+0]
- add $B,@X[1],$B
- st $B,[$ctx+4]
- add $C,@X[2],$C
- st $C,[$ctx+8]
- add $D,@X[3],$D
- st $D,[$ctx+12]
- add $E,@X[4],$E
- st $E,[$ctx+16]
-
- bne `$bits==64?"%xcc":"%icc"`,.Lloop
- andn $inp,7,$tmp0
-
- ret
- restore
-.type sha1_block_data_order,#function
-.size sha1_block_data_order,(.-sha1_block_data_order)
-.asciz "SHA1 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
-.align 4
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl b/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl
deleted file mode 100644
index e65291bb..00000000
--- a/app/openssl/crypto/sha/asm/sha1-sparcv9a.pl
+++ /dev/null
@@ -1,601 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# January 2009
-#
-# Provided that UltraSPARC VIS instructions are pipe-lined(*) and
-# pairable(*) with IALU ones, offloading of Xupdate to the UltraSPARC
-# Graphic Unit would make it possible to achieve higher instruction-
-# level parallelism, ILP, and thus higher performance. It should be
-# explicitly noted that ILP is the keyword, and it means that this
-# code would be unsuitable for cores like UltraSPARC-Tx. The idea is
-# not really novel, Sun had VIS-powered implementation for a while.
-# Unlike Sun's implementation this one can process multiple unaligned
-# input blocks, and as such works as drop-in replacement for OpenSSL
-# sha1_block_data_order. Performance improvement was measured to be
-# 40% over pure IALU sha1-sparcv9.pl on UltraSPARC-IIi, but 12% on
-# UltraSPARC-III. See below for discussion...
-#
-# The module does not present direct interest for OpenSSL, because
-# it doesn't provide better performance on contemporary SPARCv9 CPUs,
-# UltraSPARC-Tx and SPARC64-V[II] to be specific. Those who feel they
-# absolutely must score on UltraSPARC-I-IV can simply replace
-# crypto/sha/asm/sha1-sparcv9.pl with this module.
-#
-# (*) "Pipe-lined" means that even if it takes several cycles to
-# complete, next instruction using same functional unit [but not
-# depending on the result of the current instruction] can start
-# execution without having to wait for the unit. "Pairable"
-# means that two [or more] independent instructions can be
-# issued at the very same time.
-
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
-
-$output=shift;
-open STDOUT,">$output";
-
-$ctx="%i0";
-$inp="%i1";
-$len="%i2";
-$tmp0="%i3";
-$tmp1="%i4";
-$tmp2="%i5";
-$tmp3="%g5";
-
-$base="%g1";
-$align="%g4";
-$Xfer="%o5";
-$nXfer=$tmp3;
-$Xi="%o7";
-
-$A="%l0";
-$B="%l1";
-$C="%l2";
-$D="%l3";
-$E="%l4";
-@V=($A,$B,$C,$D,$E);
-
-$Actx="%o0";
-$Bctx="%o1";
-$Cctx="%o2";
-$Dctx="%o3";
-$Ectx="%o4";
-
-$fmul="%f32";
-$VK_00_19="%f34";
-$VK_20_39="%f36";
-$VK_40_59="%f38";
-$VK_60_79="%f40";
-@VK=($VK_00_19,$VK_20_39,$VK_40_59,$VK_60_79);
-@X=("%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
- "%f8", "%f9","%f10","%f11","%f12","%f13","%f14","%f15","%f16");
-
-# This is reference 2x-parallelized VIS-powered Xupdate procedure. It
-# covers even K_NN_MM addition...
-sub Xupdate {
-my ($i)=@_;
-my $K=@VK[($i+16)/20];
-my $j=($i+16)%16;
-
-# [ provided that GSR.alignaddr_offset is 5, $mul contains
-# 0x100ULL<<32|0x100 value and K_NN_MM are pre-loaded to
-# chosen registers... ]
-$code.=<<___;
- fxors @X[($j+13)%16],@X[$j],@X[$j] !-1/-1/-1:X[0]^=X[13]
- fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14]
- fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9]
- fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9]
- faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24
- fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1
- fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1
- ![fxors %f15,%f2,%f2]
- for %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp
- ![fxors %f0,%f3,%f3] !10/17/12:X[0] dependency
- fpadd32 $K,@X[$j],%f20
- std %f20,[$Xfer+`4*$j`]
-___
-# The numbers delimited with slash are the earliest possible dispatch
-# cycles for given instruction assuming 1 cycle latency for simple VIS
-# instructions, such as on UltraSPARC-I&II, 3 cycles latency, such as
-# on UltraSPARC-III&IV, and 2 cycles latency(*), respectively. Being
-# 2x-parallelized the procedure is "worth" 5, 8.5 or 6 ticks per SHA1
-# round. As [long as] FPU/VIS instructions are perfectly pairable with
-# IALU ones, the round timing is defined by the maximum between VIS
-# and IALU timings. The latter varies from round to round and averages
-# out at 6.25 ticks. This means that USI&II should operate at IALU
-# rate, while USIII&IV - at VIS rate. This explains why performance
-# improvement varies among processors. Well, given that pure IALU
-# sha1-sparcv9.pl module exhibits virtually uniform performance of
-# ~9.3 cycles per SHA1 round. Timings mentioned above are theoretical
-# lower limits. Real-life performance was measured to be 6.6 cycles
-# per SHA1 round on USIIi and 8.3 on USIII. The latter is lower than
-# half-round VIS timing, because there are 16 Xupdate-free rounds,
-# which "push down" average theoretical timing to 8 cycles...
-
-# (*) SPARC64-V[II] was originally believed to have 2 cycles VIS
-# latency. Well, it might have, but it doesn't have dedicated
-# VIS-unit. Instead, VIS instructions are executed by other
-# functional units, ones used here - by IALU. This doesn't
-# improve effective ILP...
-}
-
-# The reference Xupdate procedure is then "strained" over *pairs* of
-# BODY_NN_MM and kind of modulo-scheduled in respect to X[n]^=X[n+13]
-# and K_NN_MM addition. It's "running" 15 rounds ahead, which leaves
-# plenty of room to amortize for read-after-write hazard, as well as
-# to fetch and align input for the next spin. The VIS instructions are
-# scheduled for latency of 2 cycles, because there are not enough IALU
-# instructions to schedule for latency of 3, while scheduling for 1
-# would give no gain on USI&II anyway.
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i&~1;
-my $k=($j+16+2)%16; # ahead reference
-my $l=($j+16-2)%16; # behind reference
-my $K=@VK[($j+16-2)/20];
-
-$j=($j+16)%16;
-
-$code.=<<___ if (!($i&1));
- sll $a,5,$tmp0 !! $i
- and $c,$b,$tmp3
- ld [$Xfer+`4*($i%16)`],$Xi
- fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14]
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9]
- sll $b,30,$tmp2
- add $tmp1,$e,$e
- andn $d,$b,$tmp1
- add $Xi,$e,$e
- fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9]
- srl $b,2,$b
- or $tmp1,$tmp3,$tmp1
- or $tmp2,$b,$b
- add $tmp1,$e,$e
- faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24
-___
-$code.=<<___ if ($i&1);
- sll $a,5,$tmp0 !! $i
- and $c,$b,$tmp3
- ld [$Xfer+`4*($i%16)`],$Xi
- fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1
- sll $b,30,$tmp2
- add $tmp1,$e,$e
- fpadd32 $K,@X[$l],%f20 !
- andn $d,$b,$tmp1
- add $Xi,$e,$e
- fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13]
- srl $b,2,$b
- or $tmp1,$tmp3,$tmp1
- fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp
- or $tmp2,$b,$b
- add $tmp1,$e,$e
-___
-$code.=<<___ if ($i&1 && $i>=2);
- std %f20,[$Xfer+`4*$l`] !
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i&~1;
-my $k=($j+16+2)%16; # ahead reference
-my $l=($j+16-2)%16; # behind reference
-my $K=@VK[($j+16-2)/20];
-
-$j=($j+16)%16;
-
-$code.=<<___ if (!($i&1) && $i<64);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14]
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9]
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9]
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
- faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24
-___
-$code.=<<___ if ($i&1 && $i<64);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- fpadd32 $K,@X[$l],%f20 !
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13]
- srl $b,2,$b
- add $tmp1,$e,$e
- fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp
- or $tmp2,$b,$b
- add $Xi,$e,$e
- std %f20,[$Xfer+`4*$l`] !
-___
-$code.=<<___ if ($i==64);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- fpadd32 $K,@X[$l],%f20
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- std %f20,[$Xfer+`4*$l`]
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
-___
-$code.=<<___ if ($i>64);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i&~1;
-my $k=($j+16+2)%16; # ahead reference
-my $l=($j+16-2)%16; # behind reference
-my $K=@VK[($j+16-2)/20];
-
-$j=($j+16)%16;
-
-$code.=<<___ if (!($i&1));
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14]
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9]
- and $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- or $c,$b,$tmp1
- fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9]
- srl $b,2,$b
- and $d,$tmp1,$tmp1
- add $Xi,$e,$e
- or $tmp1,$tmp0,$tmp1
- faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24
- or $tmp2,$b,$b
- add $tmp1,$e,$e
- fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1
-___
-$code.=<<___ if ($i&1);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1
- and $c,$b,$tmp0
- add $tmp1,$e,$e
- fpadd32 $K,@X[$l],%f20 !
- sll $b,30,$tmp2
- or $c,$b,$tmp1
- fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13]
- srl $b,2,$b
- and $d,$tmp1,$tmp1
- fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp
- add $Xi,$e,$e
- or $tmp1,$tmp0,$tmp1
- or $tmp2,$b,$b
- add $tmp1,$e,$e
- std %f20,[$Xfer+`4*$l`] !
-___
-}
-
-# If there is more data to process, then we pre-fetch the data for
-# next iteration in last ten rounds...
-sub BODY_70_79 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i&~1;
-my $m=($i%8)*2;
-
-$j=($j+16)%16;
-
-$code.=<<___ if ($i==70);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- ldd [$inp+64],@X[0]
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
-
- and $inp,-64,$nXfer
- inc 64,$inp
- and $nXfer,255,$nXfer
- alignaddr %g0,$align,%g0
- add $base,$nXfer,$nXfer
-___
-$code.=<<___ if ($i==71);
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
-___
-$code.=<<___ if ($i>=72);
- faligndata @X[$m],@X[$m+2],@X[$m]
- sll $a,5,$tmp0 !! $i
- ld [$Xfer+`4*($i%16)`],$Xi
- srl $a,27,$tmp1
- add $tmp0,$e,$e
- xor $c,$b,$tmp0
- add $tmp1,$e,$e
- fpadd32 $VK_00_19,@X[$m],%f20
- sll $b,30,$tmp2
- xor $d,$tmp0,$tmp1
- srl $b,2,$b
- add $tmp1,$e,$e
- or $tmp2,$b,$b
- add $Xi,$e,$e
-___
-$code.=<<___ if ($i<77);
- ldd [$inp+`8*($i+1-70)`],@X[2*($i+1-70)]
-___
-$code.=<<___ if ($i==77); # redundant if $inp was aligned
- add $align,63,$tmp0
- and $tmp0,-8,$tmp0
- ldd [$inp+$tmp0],@X[16]
-___
-$code.=<<___ if ($i>=72);
- std %f20,[$nXfer+`4*$m`]
-___
-}
-
-$code.=<<___;
-.section ".text",#alloc,#execinstr
-
-.align 64
-vis_const:
-.long 0x5a827999,0x5a827999 ! K_00_19
-.long 0x6ed9eba1,0x6ed9eba1 ! K_20_39
-.long 0x8f1bbcdc,0x8f1bbcdc ! K_40_59
-.long 0xca62c1d6,0xca62c1d6 ! K_60_79
-.long 0x00000100,0x00000100
-.align 64
-.type vis_const,#object
-.size vis_const,(.-vis_const)
-
-.globl sha1_block_data_order
-sha1_block_data_order:
- save %sp,-$frame,%sp
- add %fp,$bias-256,$base
-
-1: call .+8
- add %o7,vis_const-1b,$tmp0
-
- ldd [$tmp0+0],$VK_00_19
- ldd [$tmp0+8],$VK_20_39
- ldd [$tmp0+16],$VK_40_59
- ldd [$tmp0+24],$VK_60_79
- ldd [$tmp0+32],$fmul
-
- ld [$ctx+0],$Actx
- and $base,-256,$base
- ld [$ctx+4],$Bctx
- sub $base,$bias+$frame,%sp
- ld [$ctx+8],$Cctx
- and $inp,7,$align
- ld [$ctx+12],$Dctx
- and $inp,-8,$inp
- ld [$ctx+16],$Ectx
-
- ! X[16] is maintained in FP register bank
- alignaddr %g0,$align,%g0
- ldd [$inp+0],@X[0]
- sub $inp,-64,$Xfer
- ldd [$inp+8],@X[2]
- and $Xfer,-64,$Xfer
- ldd [$inp+16],@X[4]
- and $Xfer,255,$Xfer
- ldd [$inp+24],@X[6]
- add $base,$Xfer,$Xfer
- ldd [$inp+32],@X[8]
- ldd [$inp+40],@X[10]
- ldd [$inp+48],@X[12]
- brz,pt $align,.Laligned
- ldd [$inp+56],@X[14]
-
- ldd [$inp+64],@X[16]
- faligndata @X[0],@X[2],@X[0]
- faligndata @X[2],@X[4],@X[2]
- faligndata @X[4],@X[6],@X[4]
- faligndata @X[6],@X[8],@X[6]
- faligndata @X[8],@X[10],@X[8]
- faligndata @X[10],@X[12],@X[10]
- faligndata @X[12],@X[14],@X[12]
- faligndata @X[14],@X[16],@X[14]
-
-.Laligned:
- mov 5,$tmp0
- dec 1,$len
- alignaddr %g0,$tmp0,%g0
- fpadd32 $VK_00_19,@X[0],%f16
- fpadd32 $VK_00_19,@X[2],%f18
- fpadd32 $VK_00_19,@X[4],%f20
- fpadd32 $VK_00_19,@X[6],%f22
- fpadd32 $VK_00_19,@X[8],%f24
- fpadd32 $VK_00_19,@X[10],%f26
- fpadd32 $VK_00_19,@X[12],%f28
- fpadd32 $VK_00_19,@X[14],%f30
- std %f16,[$Xfer+0]
- mov $Actx,$A
- std %f18,[$Xfer+8]
- mov $Bctx,$B
- std %f20,[$Xfer+16]
- mov $Cctx,$C
- std %f22,[$Xfer+24]
- mov $Dctx,$D
- std %f24,[$Xfer+32]
- mov $Ectx,$E
- std %f26,[$Xfer+40]
- fxors @X[13],@X[0],@X[0]
- std %f28,[$Xfer+48]
- ba .Loop
- std %f30,[$Xfer+56]
-.align 32
-.Loop:
-___
-for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-for (;$i<70;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- tst $len
- bz,pn `$bits==32?"%icc":"%xcc"`,.Ltail
- nop
-___
-for (;$i<80;$i++) { &BODY_70_79($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- add $A,$Actx,$Actx
- add $B,$Bctx,$Bctx
- add $C,$Cctx,$Cctx
- add $D,$Dctx,$Dctx
- add $E,$Ectx,$Ectx
- mov 5,$tmp0
- fxors @X[13],@X[0],@X[0]
- mov $Actx,$A
- mov $Bctx,$B
- mov $Cctx,$C
- mov $Dctx,$D
- mov $Ectx,$E
- alignaddr %g0,$tmp0,%g0
- dec 1,$len
- ba .Loop
- mov $nXfer,$Xfer
-
-.align 32
-.Ltail:
-___
-for($i=70;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- add $A,$Actx,$Actx
- add $B,$Bctx,$Bctx
- add $C,$Cctx,$Cctx
- add $D,$Dctx,$Dctx
- add $E,$Ectx,$Ectx
-
- st $Actx,[$ctx+0]
- st $Bctx,[$ctx+4]
- st $Cctx,[$ctx+8]
- st $Dctx,[$ctx+12]
- st $Ectx,[$ctx+16]
-
- ret
- restore
-.type sha1_block_data_order,#function
-.size sha1_block_data_order,(.-sha1_block_data_order)
-.asciz "SHA1 block transform for SPARCv9a, CRYPTOGAMS by <appro\@openssl.org>"
-.align 4
-___
-
-# Purpose of these subroutines is to explicitly encode VIS instructions,
-# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
-# Idea is to reserve for option to produce "universal" binary and let
-# programmer detect if current CPU is VIS capable at run-time.
-sub unvis {
-my ($mnemonic,$rs1,$rs2,$rd)=@_;
-my ($ref,$opf);
-my %visopf = ( "fmul8ulx16" => 0x037,
- "faligndata" => 0x048,
- "fpadd32" => 0x052,
- "fxor" => 0x06c,
- "fxors" => 0x06d );
-
- $ref = "$mnemonic\t$rs1,$rs2,$rd";
-
- if ($opf=$visopf{$mnemonic}) {
- foreach ($rs1,$rs2,$rd) {
- return $ref if (!/%f([0-9]{1,2})/);
- $_=$1;
- if ($1>=32) {
- return $ref if ($1&1);
- # re-encode for upper double register addressing
- $_=($1|$1>>5)&31;
- }
- }
-
- return sprintf ".word\t0x%08x !%s",
- 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
- $ref;
- } else {
- return $ref;
- }
-}
-sub unalignaddr {
-my ($mnemonic,$rs1,$rs2,$rd)=@_;
-my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
-my $ref="$mnemonic\t$rs1,$rs2,$rd";
-
- foreach ($rs1,$rs2,$rd) {
- if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
- else { return $ref; }
- }
- return sprintf ".word\t0x%08x !%s",
- 0x81b00300|$rd<<25|$rs1<<14|$rs2,
- $ref;
-}
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),(%f[0-9]{1,2}),(%f[0-9]{1,2})/
- &unvis($1,$2,$3,$4)
- /gem;
-$code =~ s/\b(alignaddr)\s+(%[goli][0-7]),(%[goli][0-7]),(%[goli][0-7])/
- &unalignaddr($1,$2,$3,$4)
- /gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha1-thumb.pl b/app/openssl/crypto/sha/asm/sha1-thumb.pl
deleted file mode 100644
index 7c9ea9b0..00000000
--- a/app/openssl/crypto/sha/asm/sha1-thumb.pl
+++ /dev/null
@@ -1,259 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# sha1_block for Thumb.
-#
-# January 2007.
-#
-# The code does not present direct interest to OpenSSL, because of low
-# performance. Its purpose is to establish _size_ benchmark. Pretty
-# useless one I must say, because 30% or 88 bytes larger ARMv4 code
-# [avialable on demand] is almost _twice_ as fast. It should also be
-# noted that in-lining of .Lcommon and .Lrotate improves performance
-# by over 40%, while code increases by only 10% or 32 bytes. But once
-# again, the goal was to establish _size_ benchmark, not performance.
-
-$output=shift;
-open STDOUT,">$output";
-
-$inline=0;
-#$cheat_on_binutils=1;
-
-$t0="r0";
-$t1="r1";
-$t2="r2";
-$a="r3";
-$b="r4";
-$c="r5";
-$d="r6";
-$e="r7";
-$K="r8"; # "upper" registers can be used in add/sub and mov insns
-$ctx="r9";
-$inp="r10";
-$len="r11";
-$Xi="r12";
-
-sub common {
-<<___;
- sub $t0,#4
- ldr $t1,[$t0]
- add $e,$K @ E+=K_xx_xx
- lsl $t2,$a,#5
- add $t2,$e
- lsr $e,$a,#27
- add $t2,$e @ E+=ROR(A,27)
- add $t2,$t1 @ E+=X[i]
-___
-}
-sub rotate {
-<<___;
- mov $e,$d @ E=D
- mov $d,$c @ D=C
- lsl $c,$b,#30
- lsr $b,$b,#2
- orr $c,$b @ C=ROR(B,2)
- mov $b,$a @ B=A
- add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D)
-___
-}
-
-sub BODY_00_19 {
-$code.=$inline?&common():"\tbl .Lcommon\n";
-$code.=<<___;
- mov $t1,$c
- eor $t1,$d
- and $t1,$b
- eor $t1,$d @ F_00_19(B,C,D)
-___
-$code.=$inline?&rotate():"\tbl .Lrotate\n";
-}
-
-sub BODY_20_39 {
-$code.=$inline?&common():"\tbl .Lcommon\n";
-$code.=<<___;
- mov $t1,$b
- eor $t1,$c
- eor $t1,$d @ F_20_39(B,C,D)
-___
-$code.=$inline?&rotate():"\tbl .Lrotate\n";
-}
-
-sub BODY_40_59 {
-$code.=$inline?&common():"\tbl .Lcommon\n";
-$code.=<<___;
- mov $t1,$b
- and $t1,$c
- mov $e,$b
- orr $e,$c
- and $e,$d
- orr $t1,$e @ F_40_59(B,C,D)
-___
-$code.=$inline?&rotate():"\tbl .Lrotate\n";
-}
-
-$code=<<___;
-.text
-.code 16
-
-.global sha1_block_data_order
-.type sha1_block_data_order,%function
-
-.align 2
-sha1_block_data_order:
-___
-if ($cheat_on_binutils) {
-$code.=<<___;
-.code 32
- add r3,pc,#1
- bx r3 @ switch to Thumb ISA
-.code 16
-___
-}
-$code.=<<___;
- push {r4-r7}
- mov r3,r8
- mov r4,r9
- mov r5,r10
- mov r6,r11
- mov r7,r12
- push {r3-r7,lr}
- lsl r2,#6
- mov $ctx,r0 @ save context
- mov $inp,r1 @ save inp
- mov $len,r2 @ save len
- add $len,$inp @ $len to point at inp end
-
-.Lloop:
- mov $Xi,sp
- mov $t2,sp
- sub $t2,#16*4 @ [3]
-.LXload:
- ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp
- ldrb $b,[$t1,#1]
- ldrb $c,[$t1,#2]
- ldrb $d,[$t1,#3]
- lsl $a,#24
- lsl $b,#16
- lsl $c,#8
- orr $a,$b
- orr $a,$c
- orr $a,$d
- add $t1,#4
- push {$a}
- cmp sp,$t2
- bne .LXload @ [+14*16]
-
- mov $inp,$t1 @ update $inp
- sub $t2,#32*4
- sub $t2,#32*4
- mov $e,#31 @ [+4]
-.LXupdate:
- ldr $a,[sp,#15*4]
- ldr $b,[sp,#13*4]
- ldr $c,[sp,#7*4]
- ldr $d,[sp,#2*4]
- eor $a,$b
- eor $a,$c
- eor $a,$d
- ror $a,$e
- push {$a}
- cmp sp,$t2
- bne .LXupdate @ [+(11+1)*64]
-
- ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx
- mov $t0,$Xi
-
- ldr $t2,.LK_00_19
- mov $t1,$t0
- sub $t1,#20*4
- mov $Xi,$t1
- mov $K,$t2 @ [+7+4]
-.L_00_19:
-___
- &BODY_00_19();
-$code.=<<___;
- cmp $Xi,$t0
- bne .L_00_19 @ [+(2+9+4+2+8+2)*20]
-
- ldr $t2,.LK_20_39
- mov $t1,$t0
- sub $t1,#20*4
- mov $Xi,$t1
- mov $K,$t2 @ [+5]
-.L_20_39_or_60_79:
-___
- &BODY_20_39();
-$code.=<<___;
- cmp $Xi,$t0
- bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2]
- cmp sp,$t0
- beq .Ldone @ [+2]
-
- ldr $t2,.LK_40_59
- mov $t1,$t0
- sub $t1,#20*4
- mov $Xi,$t1
- mov $K,$t2 @ [+5]
-.L_40_59:
-___
- &BODY_40_59();
-$code.=<<___;
- cmp $Xi,$t0
- bne .L_40_59 @ [+(2+9+6+2+8+2)*20]
-
- ldr $t2,.LK_60_79
- mov $Xi,sp
- mov $K,$t2
- b .L_20_39_or_60_79 @ [+4]
-.Ldone:
- mov $t0,$ctx
- ldr $t1,[$t0,#0]
- ldr $t2,[$t0,#4]
- add $a,$t1
- ldr $t1,[$t0,#8]
- add $b,$t2
- ldr $t2,[$t0,#12]
- add $c,$t1
- ldr $t1,[$t0,#16]
- add $d,$t2
- add $e,$t1
- stmia $t0!,{$a,$b,$c,$d,$e} @ [+20]
-
- add sp,#80*4 @ deallocate stack frame
- mov $t0,$ctx @ restore ctx
- mov $t1,$inp @ restore inp
- cmp $t1,$len
- beq .Lexit
- b .Lloop @ [+6] total 3212 cycles
-.Lexit:
- pop {r2-r7}
- mov r8,r2
- mov r9,r3
- mov r10,r4
- mov r11,r5
- mov r12,r6
- mov lr,r7
- pop {r4-r7}
- bx lr
-.align 2
-___
-$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline);
-$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline);
-$code.=<<___;
-.align 2
-.LK_00_19: .word 0x5a827999
-.LK_20_39: .word 0x6ed9eba1
-.LK_40_59: .word 0x8f1bbcdc
-.LK_60_79: .word 0xca62c1d6
-.size sha1_block_data_order,.-sha1_block_data_order
-.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>"
-___
-
-print $code;
-close STDOUT; # enforce flush
diff --git a/app/openssl/crypto/sha/asm/sha1-x86_64.S b/app/openssl/crypto/sha/asm/sha1-x86_64.S
deleted file mode 100644
index 3922e203..00000000
--- a/app/openssl/crypto/sha/asm/sha1-x86_64.S
+++ /dev/null
@@ -1,2486 +0,0 @@
-.text
-
-
-.globl sha1_block_data_order
-.type sha1_block_data_order,@function
-.align 16
-sha1_block_data_order:
- movl OPENSSL_ia32cap_P+0(%rip),%r9d
- movl OPENSSL_ia32cap_P+4(%rip),%r8d
- testl $512,%r8d
- jz .Lialu
- jmp _ssse3_shortcut
-
-.align 16
-.Lialu:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- movq %rsp,%r11
- movq %rdi,%r8
- subq $72,%rsp
- movq %rsi,%r9
- andq $-64,%rsp
- movq %rdx,%r10
- movq %r11,64(%rsp)
-.Lprologue:
-
- movl 0(%r8),%esi
- movl 4(%r8),%edi
- movl 8(%r8),%r11d
- movl 12(%r8),%r12d
- movl 16(%r8),%r13d
- jmp .Lloop
-
-.align 16
-.Lloop:
- movl 0(%r9),%edx
- bswapl %edx
- movl %edx,0(%rsp)
- movl %r11d,%eax
- movl 4(%r9),%ebp
- movl %esi,%ecx
- xorl %r12d,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%r13,1),%r13d
- andl %edi,%eax
- movl %ebp,4(%rsp)
- addl %ecx,%r13d
- xorl %r12d,%eax
- roll $30,%edi
- addl %eax,%r13d
- movl %edi,%eax
- movl 8(%r9),%edx
- movl %r13d,%ecx
- xorl %r11d,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%r12,1),%r12d
- andl %esi,%eax
- movl %edx,8(%rsp)
- addl %ecx,%r12d
- xorl %r11d,%eax
- roll $30,%esi
- addl %eax,%r12d
- movl %esi,%eax
- movl 12(%r9),%ebp
- movl %r12d,%ecx
- xorl %edi,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%r11,1),%r11d
- andl %r13d,%eax
- movl %ebp,12(%rsp)
- addl %ecx,%r11d
- xorl %edi,%eax
- roll $30,%r13d
- addl %eax,%r11d
- movl %r13d,%eax
- movl 16(%r9),%edx
- movl %r11d,%ecx
- xorl %esi,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%rdi,1),%edi
- andl %r12d,%eax
- movl %edx,16(%rsp)
- addl %ecx,%edi
- xorl %esi,%eax
- roll $30,%r12d
- addl %eax,%edi
- movl %r12d,%eax
- movl 20(%r9),%ebp
- movl %edi,%ecx
- xorl %r13d,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%rsi,1),%esi
- andl %r11d,%eax
- movl %ebp,20(%rsp)
- addl %ecx,%esi
- xorl %r13d,%eax
- roll $30,%r11d
- addl %eax,%esi
- movl %r11d,%eax
- movl 24(%r9),%edx
- movl %esi,%ecx
- xorl %r12d,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%r13,1),%r13d
- andl %edi,%eax
- movl %edx,24(%rsp)
- addl %ecx,%r13d
- xorl %r12d,%eax
- roll $30,%edi
- addl %eax,%r13d
- movl %edi,%eax
- movl 28(%r9),%ebp
- movl %r13d,%ecx
- xorl %r11d,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%r12,1),%r12d
- andl %esi,%eax
- movl %ebp,28(%rsp)
- addl %ecx,%r12d
- xorl %r11d,%eax
- roll $30,%esi
- addl %eax,%r12d
- movl %esi,%eax
- movl 32(%r9),%edx
- movl %r12d,%ecx
- xorl %edi,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%r11,1),%r11d
- andl %r13d,%eax
- movl %edx,32(%rsp)
- addl %ecx,%r11d
- xorl %edi,%eax
- roll $30,%r13d
- addl %eax,%r11d
- movl %r13d,%eax
- movl 36(%r9),%ebp
- movl %r11d,%ecx
- xorl %esi,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%rdi,1),%edi
- andl %r12d,%eax
- movl %ebp,36(%rsp)
- addl %ecx,%edi
- xorl %esi,%eax
- roll $30,%r12d
- addl %eax,%edi
- movl %r12d,%eax
- movl 40(%r9),%edx
- movl %edi,%ecx
- xorl %r13d,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%rsi,1),%esi
- andl %r11d,%eax
- movl %edx,40(%rsp)
- addl %ecx,%esi
- xorl %r13d,%eax
- roll $30,%r11d
- addl %eax,%esi
- movl %r11d,%eax
- movl 44(%r9),%ebp
- movl %esi,%ecx
- xorl %r12d,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%r13,1),%r13d
- andl %edi,%eax
- movl %ebp,44(%rsp)
- addl %ecx,%r13d
- xorl %r12d,%eax
- roll $30,%edi
- addl %eax,%r13d
- movl %edi,%eax
- movl 48(%r9),%edx
- movl %r13d,%ecx
- xorl %r11d,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%r12,1),%r12d
- andl %esi,%eax
- movl %edx,48(%rsp)
- addl %ecx,%r12d
- xorl %r11d,%eax
- roll $30,%esi
- addl %eax,%r12d
- movl %esi,%eax
- movl 52(%r9),%ebp
- movl %r12d,%ecx
- xorl %edi,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%r11,1),%r11d
- andl %r13d,%eax
- movl %ebp,52(%rsp)
- addl %ecx,%r11d
- xorl %edi,%eax
- roll $30,%r13d
- addl %eax,%r11d
- movl %r13d,%eax
- movl 56(%r9),%edx
- movl %r11d,%ecx
- xorl %esi,%eax
- bswapl %edx
- roll $5,%ecx
- leal 1518500249(%rbp,%rdi,1),%edi
- andl %r12d,%eax
- movl %edx,56(%rsp)
- addl %ecx,%edi
- xorl %esi,%eax
- roll $30,%r12d
- addl %eax,%edi
- movl %r12d,%eax
- movl 60(%r9),%ebp
- movl %edi,%ecx
- xorl %r13d,%eax
- bswapl %ebp
- roll $5,%ecx
- leal 1518500249(%rdx,%rsi,1),%esi
- andl %r11d,%eax
- movl %ebp,60(%rsp)
- addl %ecx,%esi
- xorl %r13d,%eax
- roll $30,%r11d
- addl %eax,%esi
- movl 0(%rsp),%edx
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 8(%rsp),%edx
- xorl %r12d,%eax
- roll $5,%ecx
- xorl 32(%rsp),%edx
- andl %edi,%eax
- leal 1518500249(%rbp,%r13,1),%r13d
- xorl 52(%rsp),%edx
- xorl %r12d,%eax
- roll $1,%edx
- addl %ecx,%r13d
- roll $30,%edi
- movl %edx,0(%rsp)
- addl %eax,%r13d
- movl 4(%rsp),%ebp
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 12(%rsp),%ebp
- xorl %r11d,%eax
- roll $5,%ecx
- xorl 36(%rsp),%ebp
- andl %esi,%eax
- leal 1518500249(%rdx,%r12,1),%r12d
- xorl 56(%rsp),%ebp
- xorl %r11d,%eax
- roll $1,%ebp
- addl %ecx,%r12d
- roll $30,%esi
- movl %ebp,4(%rsp)
- addl %eax,%r12d
- movl 8(%rsp),%edx
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 16(%rsp),%edx
- xorl %edi,%eax
- roll $5,%ecx
- xorl 40(%rsp),%edx
- andl %r13d,%eax
- leal 1518500249(%rbp,%r11,1),%r11d
- xorl 60(%rsp),%edx
- xorl %edi,%eax
- roll $1,%edx
- addl %ecx,%r11d
- roll $30,%r13d
- movl %edx,8(%rsp)
- addl %eax,%r11d
- movl 12(%rsp),%ebp
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 20(%rsp),%ebp
- xorl %esi,%eax
- roll $5,%ecx
- xorl 44(%rsp),%ebp
- andl %r12d,%eax
- leal 1518500249(%rdx,%rdi,1),%edi
- xorl 0(%rsp),%ebp
- xorl %esi,%eax
- roll $1,%ebp
- addl %ecx,%edi
- roll $30,%r12d
- movl %ebp,12(%rsp)
- addl %eax,%edi
- movl 16(%rsp),%edx
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 24(%rsp),%edx
- xorl %r13d,%eax
- roll $5,%ecx
- xorl 48(%rsp),%edx
- andl %r11d,%eax
- leal 1518500249(%rbp,%rsi,1),%esi
- xorl 4(%rsp),%edx
- xorl %r13d,%eax
- roll $1,%edx
- addl %ecx,%esi
- roll $30,%r11d
- movl %edx,16(%rsp)
- addl %eax,%esi
- movl 20(%rsp),%ebp
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 28(%rsp),%ebp
- xorl %edi,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r13,1),%r13d
- xorl 52(%rsp),%ebp
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 8(%rsp),%ebp
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%ebp
- movl %ebp,20(%rsp)
- movl 24(%rsp),%edx
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 32(%rsp),%edx
- xorl %esi,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r12,1),%r12d
- xorl 56(%rsp),%edx
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 12(%rsp),%edx
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%edx
- movl %edx,24(%rsp)
- movl 28(%rsp),%ebp
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 36(%rsp),%ebp
- xorl %r13d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r11,1),%r11d
- xorl 60(%rsp),%ebp
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 16(%rsp),%ebp
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%ebp
- movl %ebp,28(%rsp)
- movl 32(%rsp),%edx
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 40(%rsp),%edx
- xorl %r12d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%rdi,1),%edi
- xorl 0(%rsp),%edx
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 20(%rsp),%edx
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%edx
- movl %edx,32(%rsp)
- movl 36(%rsp),%ebp
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 44(%rsp),%ebp
- xorl %r11d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%rsi,1),%esi
- xorl 4(%rsp),%ebp
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 24(%rsp),%ebp
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%ebp
- movl %ebp,36(%rsp)
- movl 40(%rsp),%edx
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 48(%rsp),%edx
- xorl %edi,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r13,1),%r13d
- xorl 8(%rsp),%edx
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 28(%rsp),%edx
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%edx
- movl %edx,40(%rsp)
- movl 44(%rsp),%ebp
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 52(%rsp),%ebp
- xorl %esi,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r12,1),%r12d
- xorl 12(%rsp),%ebp
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 32(%rsp),%ebp
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%ebp
- movl %ebp,44(%rsp)
- movl 48(%rsp),%edx
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 56(%rsp),%edx
- xorl %r13d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r11,1),%r11d
- xorl 16(%rsp),%edx
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 36(%rsp),%edx
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%edx
- movl %edx,48(%rsp)
- movl 52(%rsp),%ebp
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 60(%rsp),%ebp
- xorl %r12d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%rdi,1),%edi
- xorl 20(%rsp),%ebp
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 40(%rsp),%ebp
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%ebp
- movl %ebp,52(%rsp)
- movl 56(%rsp),%edx
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 0(%rsp),%edx
- xorl %r11d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%rsi,1),%esi
- xorl 24(%rsp),%edx
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 44(%rsp),%edx
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%edx
- movl %edx,56(%rsp)
- movl 60(%rsp),%ebp
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 4(%rsp),%ebp
- xorl %edi,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r13,1),%r13d
- xorl 28(%rsp),%ebp
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 48(%rsp),%ebp
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%ebp
- movl %ebp,60(%rsp)
- movl 0(%rsp),%edx
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 8(%rsp),%edx
- xorl %esi,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r12,1),%r12d
- xorl 32(%rsp),%edx
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 52(%rsp),%edx
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%edx
- movl %edx,0(%rsp)
- movl 4(%rsp),%ebp
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 12(%rsp),%ebp
- xorl %r13d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r11,1),%r11d
- xorl 36(%rsp),%ebp
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 56(%rsp),%ebp
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%ebp
- movl %ebp,4(%rsp)
- movl 8(%rsp),%edx
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 16(%rsp),%edx
- xorl %r12d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%rdi,1),%edi
- xorl 40(%rsp),%edx
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 60(%rsp),%edx
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%edx
- movl %edx,8(%rsp)
- movl 12(%rsp),%ebp
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 20(%rsp),%ebp
- xorl %r11d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%rsi,1),%esi
- xorl 44(%rsp),%ebp
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 0(%rsp),%ebp
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%ebp
- movl %ebp,12(%rsp)
- movl 16(%rsp),%edx
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 24(%rsp),%edx
- xorl %edi,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r13,1),%r13d
- xorl 48(%rsp),%edx
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 4(%rsp),%edx
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%edx
- movl %edx,16(%rsp)
- movl 20(%rsp),%ebp
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 28(%rsp),%ebp
- xorl %esi,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%r12,1),%r12d
- xorl 52(%rsp),%ebp
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 8(%rsp),%ebp
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%ebp
- movl %ebp,20(%rsp)
- movl 24(%rsp),%edx
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 32(%rsp),%edx
- xorl %r13d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%r11,1),%r11d
- xorl 56(%rsp),%edx
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 12(%rsp),%edx
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%edx
- movl %edx,24(%rsp)
- movl 28(%rsp),%ebp
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 36(%rsp),%ebp
- xorl %r12d,%eax
- roll $5,%ecx
- leal 1859775393(%rdx,%rdi,1),%edi
- xorl 60(%rsp),%ebp
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 16(%rsp),%ebp
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%ebp
- movl %ebp,28(%rsp)
- movl 32(%rsp),%edx
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 40(%rsp),%edx
- xorl %r11d,%eax
- roll $5,%ecx
- leal 1859775393(%rbp,%rsi,1),%esi
- xorl 0(%rsp),%edx
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 20(%rsp),%edx
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%edx
- movl %edx,32(%rsp)
- movl 36(%rsp),%ebp
- movl %r11d,%eax
- movl %r11d,%ebx
- xorl 44(%rsp),%ebp
- andl %r12d,%eax
- movl %esi,%ecx
- xorl 4(%rsp),%ebp
- xorl %r12d,%ebx
- leal -1894007588(%rdx,%r13,1),%r13d
- roll $5,%ecx
- xorl 24(%rsp),%ebp
- addl %eax,%r13d
- andl %edi,%ebx
- roll $1,%ebp
- addl %ebx,%r13d
- roll $30,%edi
- movl %ebp,36(%rsp)
- addl %ecx,%r13d
- movl 40(%rsp),%edx
- movl %edi,%eax
- movl %edi,%ebx
- xorl 48(%rsp),%edx
- andl %r11d,%eax
- movl %r13d,%ecx
- xorl 8(%rsp),%edx
- xorl %r11d,%ebx
- leal -1894007588(%rbp,%r12,1),%r12d
- roll $5,%ecx
- xorl 28(%rsp),%edx
- addl %eax,%r12d
- andl %esi,%ebx
- roll $1,%edx
- addl %ebx,%r12d
- roll $30,%esi
- movl %edx,40(%rsp)
- addl %ecx,%r12d
- movl 44(%rsp),%ebp
- movl %esi,%eax
- movl %esi,%ebx
- xorl 52(%rsp),%ebp
- andl %edi,%eax
- movl %r12d,%ecx
- xorl 12(%rsp),%ebp
- xorl %edi,%ebx
- leal -1894007588(%rdx,%r11,1),%r11d
- roll $5,%ecx
- xorl 32(%rsp),%ebp
- addl %eax,%r11d
- andl %r13d,%ebx
- roll $1,%ebp
- addl %ebx,%r11d
- roll $30,%r13d
- movl %ebp,44(%rsp)
- addl %ecx,%r11d
- movl 48(%rsp),%edx
- movl %r13d,%eax
- movl %r13d,%ebx
- xorl 56(%rsp),%edx
- andl %esi,%eax
- movl %r11d,%ecx
- xorl 16(%rsp),%edx
- xorl %esi,%ebx
- leal -1894007588(%rbp,%rdi,1),%edi
- roll $5,%ecx
- xorl 36(%rsp),%edx
- addl %eax,%edi
- andl %r12d,%ebx
- roll $1,%edx
- addl %ebx,%edi
- roll $30,%r12d
- movl %edx,48(%rsp)
- addl %ecx,%edi
- movl 52(%rsp),%ebp
- movl %r12d,%eax
- movl %r12d,%ebx
- xorl 60(%rsp),%ebp
- andl %r13d,%eax
- movl %edi,%ecx
- xorl 20(%rsp),%ebp
- xorl %r13d,%ebx
- leal -1894007588(%rdx,%rsi,1),%esi
- roll $5,%ecx
- xorl 40(%rsp),%ebp
- addl %eax,%esi
- andl %r11d,%ebx
- roll $1,%ebp
- addl %ebx,%esi
- roll $30,%r11d
- movl %ebp,52(%rsp)
- addl %ecx,%esi
- movl 56(%rsp),%edx
- movl %r11d,%eax
- movl %r11d,%ebx
- xorl 0(%rsp),%edx
- andl %r12d,%eax
- movl %esi,%ecx
- xorl 24(%rsp),%edx
- xorl %r12d,%ebx
- leal -1894007588(%rbp,%r13,1),%r13d
- roll $5,%ecx
- xorl 44(%rsp),%edx
- addl %eax,%r13d
- andl %edi,%ebx
- roll $1,%edx
- addl %ebx,%r13d
- roll $30,%edi
- movl %edx,56(%rsp)
- addl %ecx,%r13d
- movl 60(%rsp),%ebp
- movl %edi,%eax
- movl %edi,%ebx
- xorl 4(%rsp),%ebp
- andl %r11d,%eax
- movl %r13d,%ecx
- xorl 28(%rsp),%ebp
- xorl %r11d,%ebx
- leal -1894007588(%rdx,%r12,1),%r12d
- roll $5,%ecx
- xorl 48(%rsp),%ebp
- addl %eax,%r12d
- andl %esi,%ebx
- roll $1,%ebp
- addl %ebx,%r12d
- roll $30,%esi
- movl %ebp,60(%rsp)
- addl %ecx,%r12d
- movl 0(%rsp),%edx
- movl %esi,%eax
- movl %esi,%ebx
- xorl 8(%rsp),%edx
- andl %edi,%eax
- movl %r12d,%ecx
- xorl 32(%rsp),%edx
- xorl %edi,%ebx
- leal -1894007588(%rbp,%r11,1),%r11d
- roll $5,%ecx
- xorl 52(%rsp),%edx
- addl %eax,%r11d
- andl %r13d,%ebx
- roll $1,%edx
- addl %ebx,%r11d
- roll $30,%r13d
- movl %edx,0(%rsp)
- addl %ecx,%r11d
- movl 4(%rsp),%ebp
- movl %r13d,%eax
- movl %r13d,%ebx
- xorl 12(%rsp),%ebp
- andl %esi,%eax
- movl %r11d,%ecx
- xorl 36(%rsp),%ebp
- xorl %esi,%ebx
- leal -1894007588(%rdx,%rdi,1),%edi
- roll $5,%ecx
- xorl 56(%rsp),%ebp
- addl %eax,%edi
- andl %r12d,%ebx
- roll $1,%ebp
- addl %ebx,%edi
- roll $30,%r12d
- movl %ebp,4(%rsp)
- addl %ecx,%edi
- movl 8(%rsp),%edx
- movl %r12d,%eax
- movl %r12d,%ebx
- xorl 16(%rsp),%edx
- andl %r13d,%eax
- movl %edi,%ecx
- xorl 40(%rsp),%edx
- xorl %r13d,%ebx
- leal -1894007588(%rbp,%rsi,1),%esi
- roll $5,%ecx
- xorl 60(%rsp),%edx
- addl %eax,%esi
- andl %r11d,%ebx
- roll $1,%edx
- addl %ebx,%esi
- roll $30,%r11d
- movl %edx,8(%rsp)
- addl %ecx,%esi
- movl 12(%rsp),%ebp
- movl %r11d,%eax
- movl %r11d,%ebx
- xorl 20(%rsp),%ebp
- andl %r12d,%eax
- movl %esi,%ecx
- xorl 44(%rsp),%ebp
- xorl %r12d,%ebx
- leal -1894007588(%rdx,%r13,1),%r13d
- roll $5,%ecx
- xorl 0(%rsp),%ebp
- addl %eax,%r13d
- andl %edi,%ebx
- roll $1,%ebp
- addl %ebx,%r13d
- roll $30,%edi
- movl %ebp,12(%rsp)
- addl %ecx,%r13d
- movl 16(%rsp),%edx
- movl %edi,%eax
- movl %edi,%ebx
- xorl 24(%rsp),%edx
- andl %r11d,%eax
- movl %r13d,%ecx
- xorl 48(%rsp),%edx
- xorl %r11d,%ebx
- leal -1894007588(%rbp,%r12,1),%r12d
- roll $5,%ecx
- xorl 4(%rsp),%edx
- addl %eax,%r12d
- andl %esi,%ebx
- roll $1,%edx
- addl %ebx,%r12d
- roll $30,%esi
- movl %edx,16(%rsp)
- addl %ecx,%r12d
- movl 20(%rsp),%ebp
- movl %esi,%eax
- movl %esi,%ebx
- xorl 28(%rsp),%ebp
- andl %edi,%eax
- movl %r12d,%ecx
- xorl 52(%rsp),%ebp
- xorl %edi,%ebx
- leal -1894007588(%rdx,%r11,1),%r11d
- roll $5,%ecx
- xorl 8(%rsp),%ebp
- addl %eax,%r11d
- andl %r13d,%ebx
- roll $1,%ebp
- addl %ebx,%r11d
- roll $30,%r13d
- movl %ebp,20(%rsp)
- addl %ecx,%r11d
- movl 24(%rsp),%edx
- movl %r13d,%eax
- movl %r13d,%ebx
- xorl 32(%rsp),%edx
- andl %esi,%eax
- movl %r11d,%ecx
- xorl 56(%rsp),%edx
- xorl %esi,%ebx
- leal -1894007588(%rbp,%rdi,1),%edi
- roll $5,%ecx
- xorl 12(%rsp),%edx
- addl %eax,%edi
- andl %r12d,%ebx
- roll $1,%edx
- addl %ebx,%edi
- roll $30,%r12d
- movl %edx,24(%rsp)
- addl %ecx,%edi
- movl 28(%rsp),%ebp
- movl %r12d,%eax
- movl %r12d,%ebx
- xorl 36(%rsp),%ebp
- andl %r13d,%eax
- movl %edi,%ecx
- xorl 60(%rsp),%ebp
- xorl %r13d,%ebx
- leal -1894007588(%rdx,%rsi,1),%esi
- roll $5,%ecx
- xorl 16(%rsp),%ebp
- addl %eax,%esi
- andl %r11d,%ebx
- roll $1,%ebp
- addl %ebx,%esi
- roll $30,%r11d
- movl %ebp,28(%rsp)
- addl %ecx,%esi
- movl 32(%rsp),%edx
- movl %r11d,%eax
- movl %r11d,%ebx
- xorl 40(%rsp),%edx
- andl %r12d,%eax
- movl %esi,%ecx
- xorl 0(%rsp),%edx
- xorl %r12d,%ebx
- leal -1894007588(%rbp,%r13,1),%r13d
- roll $5,%ecx
- xorl 20(%rsp),%edx
- addl %eax,%r13d
- andl %edi,%ebx
- roll $1,%edx
- addl %ebx,%r13d
- roll $30,%edi
- movl %edx,32(%rsp)
- addl %ecx,%r13d
- movl 36(%rsp),%ebp
- movl %edi,%eax
- movl %edi,%ebx
- xorl 44(%rsp),%ebp
- andl %r11d,%eax
- movl %r13d,%ecx
- xorl 4(%rsp),%ebp
- xorl %r11d,%ebx
- leal -1894007588(%rdx,%r12,1),%r12d
- roll $5,%ecx
- xorl 24(%rsp),%ebp
- addl %eax,%r12d
- andl %esi,%ebx
- roll $1,%ebp
- addl %ebx,%r12d
- roll $30,%esi
- movl %ebp,36(%rsp)
- addl %ecx,%r12d
- movl 40(%rsp),%edx
- movl %esi,%eax
- movl %esi,%ebx
- xorl 48(%rsp),%edx
- andl %edi,%eax
- movl %r12d,%ecx
- xorl 8(%rsp),%edx
- xorl %edi,%ebx
- leal -1894007588(%rbp,%r11,1),%r11d
- roll $5,%ecx
- xorl 28(%rsp),%edx
- addl %eax,%r11d
- andl %r13d,%ebx
- roll $1,%edx
- addl %ebx,%r11d
- roll $30,%r13d
- movl %edx,40(%rsp)
- addl %ecx,%r11d
- movl 44(%rsp),%ebp
- movl %r13d,%eax
- movl %r13d,%ebx
- xorl 52(%rsp),%ebp
- andl %esi,%eax
- movl %r11d,%ecx
- xorl 12(%rsp),%ebp
- xorl %esi,%ebx
- leal -1894007588(%rdx,%rdi,1),%edi
- roll $5,%ecx
- xorl 32(%rsp),%ebp
- addl %eax,%edi
- andl %r12d,%ebx
- roll $1,%ebp
- addl %ebx,%edi
- roll $30,%r12d
- movl %ebp,44(%rsp)
- addl %ecx,%edi
- movl 48(%rsp),%edx
- movl %r12d,%eax
- movl %r12d,%ebx
- xorl 56(%rsp),%edx
- andl %r13d,%eax
- movl %edi,%ecx
- xorl 16(%rsp),%edx
- xorl %r13d,%ebx
- leal -1894007588(%rbp,%rsi,1),%esi
- roll $5,%ecx
- xorl 36(%rsp),%edx
- addl %eax,%esi
- andl %r11d,%ebx
- roll $1,%edx
- addl %ebx,%esi
- roll $30,%r11d
- movl %edx,48(%rsp)
- addl %ecx,%esi
- movl 52(%rsp),%ebp
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 60(%rsp),%ebp
- xorl %edi,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r13,1),%r13d
- xorl 20(%rsp),%ebp
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 40(%rsp),%ebp
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%ebp
- movl %ebp,52(%rsp)
- movl 56(%rsp),%edx
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 0(%rsp),%edx
- xorl %esi,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r12,1),%r12d
- xorl 24(%rsp),%edx
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 44(%rsp),%edx
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%edx
- movl %edx,56(%rsp)
- movl 60(%rsp),%ebp
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 4(%rsp),%ebp
- xorl %r13d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r11,1),%r11d
- xorl 28(%rsp),%ebp
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 48(%rsp),%ebp
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%ebp
- movl %ebp,60(%rsp)
- movl 0(%rsp),%edx
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 8(%rsp),%edx
- xorl %r12d,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%rdi,1),%edi
- xorl 32(%rsp),%edx
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 52(%rsp),%edx
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%edx
- movl %edx,0(%rsp)
- movl 4(%rsp),%ebp
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 12(%rsp),%ebp
- xorl %r11d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%rsi,1),%esi
- xorl 36(%rsp),%ebp
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 56(%rsp),%ebp
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%ebp
- movl %ebp,4(%rsp)
- movl 8(%rsp),%edx
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 16(%rsp),%edx
- xorl %edi,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r13,1),%r13d
- xorl 40(%rsp),%edx
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 60(%rsp),%edx
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%edx
- movl %edx,8(%rsp)
- movl 12(%rsp),%ebp
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 20(%rsp),%ebp
- xorl %esi,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r12,1),%r12d
- xorl 44(%rsp),%ebp
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 0(%rsp),%ebp
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%ebp
- movl %ebp,12(%rsp)
- movl 16(%rsp),%edx
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 24(%rsp),%edx
- xorl %r13d,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r11,1),%r11d
- xorl 48(%rsp),%edx
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 4(%rsp),%edx
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%edx
- movl %edx,16(%rsp)
- movl 20(%rsp),%ebp
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 28(%rsp),%ebp
- xorl %r12d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%rdi,1),%edi
- xorl 52(%rsp),%ebp
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 8(%rsp),%ebp
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%ebp
- movl %ebp,20(%rsp)
- movl 24(%rsp),%edx
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 32(%rsp),%edx
- xorl %r11d,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%rsi,1),%esi
- xorl 56(%rsp),%edx
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 12(%rsp),%edx
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%edx
- movl %edx,24(%rsp)
- movl 28(%rsp),%ebp
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 36(%rsp),%ebp
- xorl %edi,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r13,1),%r13d
- xorl 60(%rsp),%ebp
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 16(%rsp),%ebp
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%ebp
- movl %ebp,28(%rsp)
- movl 32(%rsp),%edx
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 40(%rsp),%edx
- xorl %esi,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r12,1),%r12d
- xorl 0(%rsp),%edx
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 20(%rsp),%edx
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%edx
- movl %edx,32(%rsp)
- movl 36(%rsp),%ebp
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 44(%rsp),%ebp
- xorl %r13d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r11,1),%r11d
- xorl 4(%rsp),%ebp
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 24(%rsp),%ebp
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%ebp
- movl %ebp,36(%rsp)
- movl 40(%rsp),%edx
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 48(%rsp),%edx
- xorl %r12d,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%rdi,1),%edi
- xorl 8(%rsp),%edx
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 28(%rsp),%edx
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%edx
- movl %edx,40(%rsp)
- movl 44(%rsp),%ebp
- movl %r12d,%eax
- movl %edi,%ecx
- xorl 52(%rsp),%ebp
- xorl %r11d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%rsi,1),%esi
- xorl 12(%rsp),%ebp
- xorl %r13d,%eax
- addl %ecx,%esi
- xorl 32(%rsp),%ebp
- roll $30,%r11d
- addl %eax,%esi
- roll $1,%ebp
- movl %ebp,44(%rsp)
- movl 48(%rsp),%edx
- movl %r11d,%eax
- movl %esi,%ecx
- xorl 56(%rsp),%edx
- xorl %edi,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r13,1),%r13d
- xorl 16(%rsp),%edx
- xorl %r12d,%eax
- addl %ecx,%r13d
- xorl 36(%rsp),%edx
- roll $30,%edi
- addl %eax,%r13d
- roll $1,%edx
- movl %edx,48(%rsp)
- movl 52(%rsp),%ebp
- movl %edi,%eax
- movl %r13d,%ecx
- xorl 60(%rsp),%ebp
- xorl %esi,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%r12,1),%r12d
- xorl 20(%rsp),%ebp
- xorl %r11d,%eax
- addl %ecx,%r12d
- xorl 40(%rsp),%ebp
- roll $30,%esi
- addl %eax,%r12d
- roll $1,%ebp
- movl 56(%rsp),%edx
- movl %esi,%eax
- movl %r12d,%ecx
- xorl 0(%rsp),%edx
- xorl %r13d,%eax
- roll $5,%ecx
- leal -899497514(%rbp,%r11,1),%r11d
- xorl 24(%rsp),%edx
- xorl %edi,%eax
- addl %ecx,%r11d
- xorl 44(%rsp),%edx
- roll $30,%r13d
- addl %eax,%r11d
- roll $1,%edx
- movl 60(%rsp),%ebp
- movl %r13d,%eax
- movl %r11d,%ecx
- xorl 4(%rsp),%ebp
- xorl %r12d,%eax
- roll $5,%ecx
- leal -899497514(%rdx,%rdi,1),%edi
- xorl 28(%rsp),%ebp
- xorl %esi,%eax
- addl %ecx,%edi
- xorl 48(%rsp),%ebp
- roll $30,%r12d
- addl %eax,%edi
- roll $1,%ebp
- movl %r12d,%eax
- movl %edi,%ecx
- xorl %r11d,%eax
- leal -899497514(%rbp,%rsi,1),%esi
- roll $5,%ecx
- xorl %r13d,%eax
- addl %ecx,%esi
- roll $30,%r11d
- addl %eax,%esi
- addl 0(%r8),%esi
- addl 4(%r8),%edi
- addl 8(%r8),%r11d
- addl 12(%r8),%r12d
- addl 16(%r8),%r13d
- movl %esi,0(%r8)
- movl %edi,4(%r8)
- movl %r11d,8(%r8)
- movl %r12d,12(%r8)
- movl %r13d,16(%r8)
-
- subq $1,%r10
- leaq 64(%r9),%r9
- jnz .Lloop
-
- movq 64(%rsp),%rsi
- movq (%rsi),%r13
- movq 8(%rsi),%r12
- movq 16(%rsi),%rbp
- movq 24(%rsi),%rbx
- leaq 32(%rsi),%rsp
-.Lepilogue:
- .byte 0xf3,0xc3
-.size sha1_block_data_order,.-sha1_block_data_order
-.type sha1_block_data_order_ssse3,@function
-.align 16
-sha1_block_data_order_ssse3:
-_ssse3_shortcut:
- pushq %rbx
- pushq %rbp
- pushq %r12
- leaq -64(%rsp),%rsp
- movq %rdi,%r8
- movq %rsi,%r9
- movq %rdx,%r10
-
- shlq $6,%r10
- addq %r9,%r10
- leaq K_XX_XX(%rip),%r11
-
- movl 0(%r8),%eax
- movl 4(%r8),%ebx
- movl 8(%r8),%ecx
- movl 12(%r8),%edx
- movl %ebx,%esi
- movl 16(%r8),%ebp
-
- movdqa 64(%r11),%xmm6
- movdqa 0(%r11),%xmm9
- movdqu 0(%r9),%xmm0
- movdqu 16(%r9),%xmm1
- movdqu 32(%r9),%xmm2
- movdqu 48(%r9),%xmm3
-.byte 102,15,56,0,198
- addq $64,%r9
-.byte 102,15,56,0,206
-.byte 102,15,56,0,214
-.byte 102,15,56,0,222
- paddd %xmm9,%xmm0
- paddd %xmm9,%xmm1
- paddd %xmm9,%xmm2
- movdqa %xmm0,0(%rsp)
- psubd %xmm9,%xmm0
- movdqa %xmm1,16(%rsp)
- psubd %xmm9,%xmm1
- movdqa %xmm2,32(%rsp)
- psubd %xmm9,%xmm2
- jmp .Loop_ssse3
-.align 16
-.Loop_ssse3:
- movdqa %xmm1,%xmm4
- addl 0(%rsp),%ebp
- xorl %edx,%ecx
- movdqa %xmm3,%xmm8
-.byte 102,15,58,15,224,8
- movl %eax,%edi
- roll $5,%eax
- paddd %xmm3,%xmm9
- andl %ecx,%esi
- xorl %edx,%ecx
- psrldq $4,%xmm8
- xorl %edx,%esi
- addl %eax,%ebp
- pxor %xmm0,%xmm4
- rorl $2,%ebx
- addl %esi,%ebp
- pxor %xmm2,%xmm8
- addl 4(%rsp),%edx
- xorl %ecx,%ebx
- movl %ebp,%esi
- roll $5,%ebp
- pxor %xmm8,%xmm4
- andl %ebx,%edi
- xorl %ecx,%ebx
- movdqa %xmm9,48(%rsp)
- xorl %ecx,%edi
- addl %ebp,%edx
- movdqa %xmm4,%xmm10
- movdqa %xmm4,%xmm8
- rorl $7,%eax
- addl %edi,%edx
- addl 8(%rsp),%ecx
- xorl %ebx,%eax
- pslldq $12,%xmm10
- paddd %xmm4,%xmm4
- movl %edx,%edi
- roll $5,%edx
- andl %eax,%esi
- xorl %ebx,%eax
- psrld $31,%xmm8
- xorl %ebx,%esi
- addl %edx,%ecx
- movdqa %xmm10,%xmm9
- rorl $7,%ebp
- addl %esi,%ecx
- psrld $30,%xmm10
- por %xmm8,%xmm4
- addl 12(%rsp),%ebx
- xorl %eax,%ebp
- movl %ecx,%esi
- roll $5,%ecx
- pslld $2,%xmm9
- pxor %xmm10,%xmm4
- andl %ebp,%edi
- xorl %eax,%ebp
- movdqa 0(%r11),%xmm10
- xorl %eax,%edi
- addl %ecx,%ebx
- pxor %xmm9,%xmm4
- rorl $7,%edx
- addl %edi,%ebx
- movdqa %xmm2,%xmm5
- addl 16(%rsp),%eax
- xorl %ebp,%edx
- movdqa %xmm4,%xmm9
-.byte 102,15,58,15,233,8
- movl %ebx,%edi
- roll $5,%ebx
- paddd %xmm4,%xmm10
- andl %edx,%esi
- xorl %ebp,%edx
- psrldq $4,%xmm9
- xorl %ebp,%esi
- addl %ebx,%eax
- pxor %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
- pxor %xmm3,%xmm9
- addl 20(%rsp),%ebp
- xorl %edx,%ecx
- movl %eax,%esi
- roll $5,%eax
- pxor %xmm9,%xmm5
- andl %ecx,%edi
- xorl %edx,%ecx
- movdqa %xmm10,0(%rsp)
- xorl %edx,%edi
- addl %eax,%ebp
- movdqa %xmm5,%xmm8
- movdqa %xmm5,%xmm9
- rorl $7,%ebx
- addl %edi,%ebp
- addl 24(%rsp),%edx
- xorl %ecx,%ebx
- pslldq $12,%xmm8
- paddd %xmm5,%xmm5
- movl %ebp,%edi
- roll $5,%ebp
- andl %ebx,%esi
- xorl %ecx,%ebx
- psrld $31,%xmm9
- xorl %ecx,%esi
- addl %ebp,%edx
- movdqa %xmm8,%xmm10
- rorl $7,%eax
- addl %esi,%edx
- psrld $30,%xmm8
- por %xmm9,%xmm5
- addl 28(%rsp),%ecx
- xorl %ebx,%eax
- movl %edx,%esi
- roll $5,%edx
- pslld $2,%xmm10
- pxor %xmm8,%xmm5
- andl %eax,%edi
- xorl %ebx,%eax
- movdqa 16(%r11),%xmm8
- xorl %ebx,%edi
- addl %edx,%ecx
- pxor %xmm10,%xmm5
- rorl $7,%ebp
- addl %edi,%ecx
- movdqa %xmm3,%xmm6
- addl 32(%rsp),%ebx
- xorl %eax,%ebp
- movdqa %xmm5,%xmm10
-.byte 102,15,58,15,242,8
- movl %ecx,%edi
- roll $5,%ecx
- paddd %xmm5,%xmm8
- andl %ebp,%esi
- xorl %eax,%ebp
- psrldq $4,%xmm10
- xorl %eax,%esi
- addl %ecx,%ebx
- pxor %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
- pxor %xmm4,%xmm10
- addl 36(%rsp),%eax
- xorl %ebp,%edx
- movl %ebx,%esi
- roll $5,%ebx
- pxor %xmm10,%xmm6
- andl %edx,%edi
- xorl %ebp,%edx
- movdqa %xmm8,16(%rsp)
- xorl %ebp,%edi
- addl %ebx,%eax
- movdqa %xmm6,%xmm9
- movdqa %xmm6,%xmm10
- rorl $7,%ecx
- addl %edi,%eax
- addl 40(%rsp),%ebp
- xorl %edx,%ecx
- pslldq $12,%xmm9
- paddd %xmm6,%xmm6
- movl %eax,%edi
- roll $5,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
- psrld $31,%xmm10
- xorl %edx,%esi
- addl %eax,%ebp
- movdqa %xmm9,%xmm8
- rorl $7,%ebx
- addl %esi,%ebp
- psrld $30,%xmm9
- por %xmm10,%xmm6
- addl 44(%rsp),%edx
- xorl %ecx,%ebx
- movl %ebp,%esi
- roll $5,%ebp
- pslld $2,%xmm8
- pxor %xmm9,%xmm6
- andl %ebx,%edi
- xorl %ecx,%ebx
- movdqa 16(%r11),%xmm9
- xorl %ecx,%edi
- addl %ebp,%edx
- pxor %xmm8,%xmm6
- rorl $7,%eax
- addl %edi,%edx
- movdqa %xmm4,%xmm7
- addl 48(%rsp),%ecx
- xorl %ebx,%eax
- movdqa %xmm6,%xmm8
-.byte 102,15,58,15,251,8
- movl %edx,%edi
- roll $5,%edx
- paddd %xmm6,%xmm9
- andl %eax,%esi
- xorl %ebx,%eax
- psrldq $4,%xmm8
- xorl %ebx,%esi
- addl %edx,%ecx
- pxor %xmm3,%xmm7
- rorl $7,%ebp
- addl %esi,%ecx
- pxor %xmm5,%xmm8
- addl 52(%rsp),%ebx
- xorl %eax,%ebp
- movl %ecx,%esi
- roll $5,%ecx
- pxor %xmm8,%xmm7
- andl %ebp,%edi
- xorl %eax,%ebp
- movdqa %xmm9,32(%rsp)
- xorl %eax,%edi
- addl %ecx,%ebx
- movdqa %xmm7,%xmm10
- movdqa %xmm7,%xmm8
- rorl $7,%edx
- addl %edi,%ebx
- addl 56(%rsp),%eax
- xorl %ebp,%edx
- pslldq $12,%xmm10
- paddd %xmm7,%xmm7
- movl %ebx,%edi
- roll $5,%ebx
- andl %edx,%esi
- xorl %ebp,%edx
- psrld $31,%xmm8
- xorl %ebp,%esi
- addl %ebx,%eax
- movdqa %xmm10,%xmm9
- rorl $7,%ecx
- addl %esi,%eax
- psrld $30,%xmm10
- por %xmm8,%xmm7
- addl 60(%rsp),%ebp
- xorl %edx,%ecx
- movl %eax,%esi
- roll $5,%eax
- pslld $2,%xmm9
- pxor %xmm10,%xmm7
- andl %ecx,%edi
- xorl %edx,%ecx
- movdqa 16(%r11),%xmm10
- xorl %edx,%edi
- addl %eax,%ebp
- pxor %xmm9,%xmm7
- rorl $7,%ebx
- addl %edi,%ebp
- movdqa %xmm7,%xmm9
- addl 0(%rsp),%edx
- pxor %xmm4,%xmm0
-.byte 102,68,15,58,15,206,8
- xorl %ecx,%ebx
- movl %ebp,%edi
- roll $5,%ebp
- pxor %xmm1,%xmm0
- andl %ebx,%esi
- xorl %ecx,%ebx
- movdqa %xmm10,%xmm8
- paddd %xmm7,%xmm10
- xorl %ecx,%esi
- addl %ebp,%edx
- pxor %xmm9,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- addl 4(%rsp),%ecx
- xorl %ebx,%eax
- movdqa %xmm0,%xmm9
- movdqa %xmm10,48(%rsp)
- movl %edx,%esi
- roll $5,%edx
- andl %eax,%edi
- xorl %ebx,%eax
- pslld $2,%xmm0
- xorl %ebx,%edi
- addl %edx,%ecx
- psrld $30,%xmm9
- rorl $7,%ebp
- addl %edi,%ecx
- addl 8(%rsp),%ebx
- xorl %eax,%ebp
- movl %ecx,%edi
- roll $5,%ecx
- por %xmm9,%xmm0
- andl %ebp,%esi
- xorl %eax,%ebp
- movdqa %xmm0,%xmm10
- xorl %eax,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 12(%rsp),%eax
- xorl %ebp,%edx
- movl %ebx,%esi
- roll $5,%ebx
- andl %edx,%edi
- xorl %ebp,%edx
- xorl %ebp,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- addl 16(%rsp),%ebp
- pxor %xmm5,%xmm1
-.byte 102,68,15,58,15,215,8
- xorl %edx,%esi
- movl %eax,%edi
- roll $5,%eax
- pxor %xmm2,%xmm1
- xorl %ecx,%esi
- addl %eax,%ebp
- movdqa %xmm8,%xmm9
- paddd %xmm0,%xmm8
- rorl $7,%ebx
- addl %esi,%ebp
- pxor %xmm10,%xmm1
- addl 20(%rsp),%edx
- xorl %ecx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- movdqa %xmm1,%xmm10
- movdqa %xmm8,0(%rsp)
- xorl %ebx,%edi
- addl %ebp,%edx
- rorl $7,%eax
- addl %edi,%edx
- pslld $2,%xmm1
- addl 24(%rsp),%ecx
- xorl %ebx,%esi
- psrld $30,%xmm10
- movl %edx,%edi
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %esi,%ecx
- por %xmm10,%xmm1
- addl 28(%rsp),%ebx
- xorl %eax,%edi
- movdqa %xmm1,%xmm8
- movl %ecx,%esi
- roll $5,%ecx
- xorl %ebp,%edi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %edi,%ebx
- addl 32(%rsp),%eax
- pxor %xmm6,%xmm2
-.byte 102,68,15,58,15,192,8
- xorl %ebp,%esi
- movl %ebx,%edi
- roll $5,%ebx
- pxor %xmm3,%xmm2
- xorl %edx,%esi
- addl %ebx,%eax
- movdqa 32(%r11),%xmm10
- paddd %xmm1,%xmm9
- rorl $7,%ecx
- addl %esi,%eax
- pxor %xmm8,%xmm2
- addl 36(%rsp),%ebp
- xorl %edx,%edi
- movl %eax,%esi
- roll $5,%eax
- movdqa %xmm2,%xmm8
- movdqa %xmm9,16(%rsp)
- xorl %ecx,%edi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %edi,%ebp
- pslld $2,%xmm2
- addl 40(%rsp),%edx
- xorl %ecx,%esi
- psrld $30,%xmm8
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%esi
- addl %ebp,%edx
- rorl $7,%eax
- addl %esi,%edx
- por %xmm8,%xmm2
- addl 44(%rsp),%ecx
- xorl %ebx,%edi
- movdqa %xmm2,%xmm9
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%edi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %edi,%ecx
- addl 48(%rsp),%ebx
- pxor %xmm7,%xmm3
-.byte 102,68,15,58,15,201,8
- xorl %eax,%esi
- movl %ecx,%edi
- roll $5,%ecx
- pxor %xmm4,%xmm3
- xorl %ebp,%esi
- addl %ecx,%ebx
- movdqa %xmm10,%xmm8
- paddd %xmm2,%xmm10
- rorl $7,%edx
- addl %esi,%ebx
- pxor %xmm9,%xmm3
- addl 52(%rsp),%eax
- xorl %ebp,%edi
- movl %ebx,%esi
- roll $5,%ebx
- movdqa %xmm3,%xmm9
- movdqa %xmm10,32(%rsp)
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- pslld $2,%xmm3
- addl 56(%rsp),%ebp
- xorl %edx,%esi
- psrld $30,%xmm9
- movl %eax,%edi
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %esi,%ebp
- por %xmm9,%xmm3
- addl 60(%rsp),%edx
- xorl %ecx,%edi
- movdqa %xmm3,%xmm10
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ebx,%edi
- addl %ebp,%edx
- rorl $7,%eax
- addl %edi,%edx
- addl 0(%rsp),%ecx
- pxor %xmm0,%xmm4
-.byte 102,68,15,58,15,210,8
- xorl %ebx,%esi
- movl %edx,%edi
- roll $5,%edx
- pxor %xmm5,%xmm4
- xorl %eax,%esi
- addl %edx,%ecx
- movdqa %xmm8,%xmm9
- paddd %xmm3,%xmm8
- rorl $7,%ebp
- addl %esi,%ecx
- pxor %xmm10,%xmm4
- addl 4(%rsp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
- roll $5,%ecx
- movdqa %xmm4,%xmm10
- movdqa %xmm8,48(%rsp)
- xorl %ebp,%edi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %edi,%ebx
- pslld $2,%xmm4
- addl 8(%rsp),%eax
- xorl %ebp,%esi
- psrld $30,%xmm10
- movl %ebx,%edi
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- por %xmm10,%xmm4
- addl 12(%rsp),%ebp
- xorl %edx,%edi
- movdqa %xmm4,%xmm8
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%edi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %edi,%ebp
- addl 16(%rsp),%edx
- pxor %xmm1,%xmm5
-.byte 102,68,15,58,15,195,8
- xorl %ecx,%esi
- movl %ebp,%edi
- roll $5,%ebp
- pxor %xmm6,%xmm5
- xorl %ebx,%esi
- addl %ebp,%edx
- movdqa %xmm9,%xmm10
- paddd %xmm4,%xmm9
- rorl $7,%eax
- addl %esi,%edx
- pxor %xmm8,%xmm5
- addl 20(%rsp),%ecx
- xorl %ebx,%edi
- movl %edx,%esi
- roll $5,%edx
- movdqa %xmm5,%xmm8
- movdqa %xmm9,0(%rsp)
- xorl %eax,%edi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %edi,%ecx
- pslld $2,%xmm5
- addl 24(%rsp),%ebx
- xorl %eax,%esi
- psrld $30,%xmm8
- movl %ecx,%edi
- roll $5,%ecx
- xorl %ebp,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- por %xmm8,%xmm5
- addl 28(%rsp),%eax
- xorl %ebp,%edi
- movdqa %xmm5,%xmm9
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- movl %ecx,%edi
- pxor %xmm2,%xmm6
-.byte 102,68,15,58,15,204,8
- xorl %edx,%ecx
- addl 32(%rsp),%ebp
- andl %edx,%edi
- pxor %xmm7,%xmm6
- andl %ecx,%esi
- rorl $7,%ebx
- movdqa %xmm10,%xmm8
- paddd %xmm5,%xmm10
- addl %edi,%ebp
- movl %eax,%edi
- pxor %xmm9,%xmm6
- roll $5,%eax
- addl %esi,%ebp
- xorl %edx,%ecx
- addl %eax,%ebp
- movdqa %xmm6,%xmm9
- movdqa %xmm10,16(%rsp)
- movl %ebx,%esi
- xorl %ecx,%ebx
- addl 36(%rsp),%edx
- andl %ecx,%esi
- pslld $2,%xmm6
- andl %ebx,%edi
- rorl $7,%eax
- psrld $30,%xmm9
- addl %esi,%edx
- movl %ebp,%esi
- roll $5,%ebp
- addl %edi,%edx
- xorl %ecx,%ebx
- addl %ebp,%edx
- por %xmm9,%xmm6
- movl %eax,%edi
- xorl %ebx,%eax
- movdqa %xmm6,%xmm10
- addl 40(%rsp),%ecx
- andl %ebx,%edi
- andl %eax,%esi
- rorl $7,%ebp
- addl %edi,%ecx
- movl %edx,%edi
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movl %ebp,%esi
- xorl %eax,%ebp
- addl 44(%rsp),%ebx
- andl %eax,%esi
- andl %ebp,%edi
- rorl $7,%edx
- addl %esi,%ebx
- movl %ecx,%esi
- roll $5,%ecx
- addl %edi,%ebx
- xorl %eax,%ebp
- addl %ecx,%ebx
- movl %edx,%edi
- pxor %xmm3,%xmm7
-.byte 102,68,15,58,15,213,8
- xorl %ebp,%edx
- addl 48(%rsp),%eax
- andl %ebp,%edi
- pxor %xmm0,%xmm7
- andl %edx,%esi
- rorl $7,%ecx
- movdqa 48(%r11),%xmm9
- paddd %xmm6,%xmm8
- addl %edi,%eax
- movl %ebx,%edi
- pxor %xmm10,%xmm7
- roll $5,%ebx
- addl %esi,%eax
- xorl %ebp,%edx
- addl %ebx,%eax
- movdqa %xmm7,%xmm10
- movdqa %xmm8,32(%rsp)
- movl %ecx,%esi
- xorl %edx,%ecx
- addl 52(%rsp),%ebp
- andl %edx,%esi
- pslld $2,%xmm7
- andl %ecx,%edi
- rorl $7,%ebx
- psrld $30,%xmm10
- addl %esi,%ebp
- movl %eax,%esi
- roll $5,%eax
- addl %edi,%ebp
- xorl %edx,%ecx
- addl %eax,%ebp
- por %xmm10,%xmm7
- movl %ebx,%edi
- xorl %ecx,%ebx
- movdqa %xmm7,%xmm8
- addl 56(%rsp),%edx
- andl %ecx,%edi
- andl %ebx,%esi
- rorl $7,%eax
- addl %edi,%edx
- movl %ebp,%edi
- roll $5,%ebp
- addl %esi,%edx
- xorl %ecx,%ebx
- addl %ebp,%edx
- movl %eax,%esi
- xorl %ebx,%eax
- addl 60(%rsp),%ecx
- andl %ebx,%esi
- andl %eax,%edi
- rorl $7,%ebp
- addl %esi,%ecx
- movl %edx,%esi
- roll $5,%edx
- addl %edi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movl %ebp,%edi
- pxor %xmm4,%xmm0
-.byte 102,68,15,58,15,198,8
- xorl %eax,%ebp
- addl 0(%rsp),%ebx
- andl %eax,%edi
- pxor %xmm1,%xmm0
- andl %ebp,%esi
- rorl $7,%edx
- movdqa %xmm9,%xmm10
- paddd %xmm7,%xmm9
- addl %edi,%ebx
- movl %ecx,%edi
- pxor %xmm8,%xmm0
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%ebp
- addl %ecx,%ebx
- movdqa %xmm0,%xmm8
- movdqa %xmm9,48(%rsp)
- movl %edx,%esi
- xorl %ebp,%edx
- addl 4(%rsp),%eax
- andl %ebp,%esi
- pslld $2,%xmm0
- andl %edx,%edi
- rorl $7,%ecx
- psrld $30,%xmm8
- addl %esi,%eax
- movl %ebx,%esi
- roll $5,%ebx
- addl %edi,%eax
- xorl %ebp,%edx
- addl %ebx,%eax
- por %xmm8,%xmm0
- movl %ecx,%edi
- xorl %edx,%ecx
- movdqa %xmm0,%xmm9
- addl 8(%rsp),%ebp
- andl %edx,%edi
- andl %ecx,%esi
- rorl $7,%ebx
- addl %edi,%ebp
- movl %eax,%edi
- roll $5,%eax
- addl %esi,%ebp
- xorl %edx,%ecx
- addl %eax,%ebp
- movl %ebx,%esi
- xorl %ecx,%ebx
- addl 12(%rsp),%edx
- andl %ecx,%esi
- andl %ebx,%edi
- rorl $7,%eax
- addl %esi,%edx
- movl %ebp,%esi
- roll $5,%ebp
- addl %edi,%edx
- xorl %ecx,%ebx
- addl %ebp,%edx
- movl %eax,%edi
- pxor %xmm5,%xmm1
-.byte 102,68,15,58,15,207,8
- xorl %ebx,%eax
- addl 16(%rsp),%ecx
- andl %ebx,%edi
- pxor %xmm2,%xmm1
- andl %eax,%esi
- rorl $7,%ebp
- movdqa %xmm10,%xmm8
- paddd %xmm0,%xmm10
- addl %edi,%ecx
- movl %edx,%edi
- pxor %xmm9,%xmm1
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movdqa %xmm1,%xmm9
- movdqa %xmm10,0(%rsp)
- movl %ebp,%esi
- xorl %eax,%ebp
- addl 20(%rsp),%ebx
- andl %eax,%esi
- pslld $2,%xmm1
- andl %ebp,%edi
- rorl $7,%edx
- psrld $30,%xmm9
- addl %esi,%ebx
- movl %ecx,%esi
- roll $5,%ecx
- addl %edi,%ebx
- xorl %eax,%ebp
- addl %ecx,%ebx
- por %xmm9,%xmm1
- movl %edx,%edi
- xorl %ebp,%edx
- movdqa %xmm1,%xmm10
- addl 24(%rsp),%eax
- andl %ebp,%edi
- andl %edx,%esi
- rorl $7,%ecx
- addl %edi,%eax
- movl %ebx,%edi
- roll $5,%ebx
- addl %esi,%eax
- xorl %ebp,%edx
- addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
- addl 28(%rsp),%ebp
- andl %edx,%esi
- andl %ecx,%edi
- rorl $7,%ebx
- addl %esi,%ebp
- movl %eax,%esi
- roll $5,%eax
- addl %edi,%ebp
- xorl %edx,%ecx
- addl %eax,%ebp
- movl %ebx,%edi
- pxor %xmm6,%xmm2
-.byte 102,68,15,58,15,208,8
- xorl %ecx,%ebx
- addl 32(%rsp),%edx
- andl %ecx,%edi
- pxor %xmm3,%xmm2
- andl %ebx,%esi
- rorl $7,%eax
- movdqa %xmm8,%xmm9
- paddd %xmm1,%xmm8
- addl %edi,%edx
- movl %ebp,%edi
- pxor %xmm10,%xmm2
- roll $5,%ebp
- addl %esi,%edx
- xorl %ecx,%ebx
- addl %ebp,%edx
- movdqa %xmm2,%xmm10
- movdqa %xmm8,16(%rsp)
- movl %eax,%esi
- xorl %ebx,%eax
- addl 36(%rsp),%ecx
- andl %ebx,%esi
- pslld $2,%xmm2
- andl %eax,%edi
- rorl $7,%ebp
- psrld $30,%xmm10
- addl %esi,%ecx
- movl %edx,%esi
- roll $5,%edx
- addl %edi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- por %xmm10,%xmm2
- movl %ebp,%edi
- xorl %eax,%ebp
- movdqa %xmm2,%xmm8
- addl 40(%rsp),%ebx
- andl %eax,%edi
- andl %ebp,%esi
- rorl $7,%edx
- addl %edi,%ebx
- movl %ecx,%edi
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%ebp
- addl %ecx,%ebx
- movl %edx,%esi
- xorl %ebp,%edx
- addl 44(%rsp),%eax
- andl %ebp,%esi
- andl %edx,%edi
- rorl $7,%ecx
- addl %esi,%eax
- movl %ebx,%esi
- roll $5,%ebx
- addl %edi,%eax
- xorl %ebp,%edx
- addl %ebx,%eax
- addl 48(%rsp),%ebp
- pxor %xmm7,%xmm3
-.byte 102,68,15,58,15,193,8
- xorl %edx,%esi
- movl %eax,%edi
- roll $5,%eax
- pxor %xmm4,%xmm3
- xorl %ecx,%esi
- addl %eax,%ebp
- movdqa %xmm9,%xmm10
- paddd %xmm2,%xmm9
- rorl $7,%ebx
- addl %esi,%ebp
- pxor %xmm8,%xmm3
- addl 52(%rsp),%edx
- xorl %ecx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- movdqa %xmm3,%xmm8
- movdqa %xmm9,32(%rsp)
- xorl %ebx,%edi
- addl %ebp,%edx
- rorl $7,%eax
- addl %edi,%edx
- pslld $2,%xmm3
- addl 56(%rsp),%ecx
- xorl %ebx,%esi
- psrld $30,%xmm8
- movl %edx,%edi
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %esi,%ecx
- por %xmm8,%xmm3
- addl 60(%rsp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
- roll $5,%ecx
- xorl %ebp,%edi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %edi,%ebx
- addl 0(%rsp),%eax
- paddd %xmm3,%xmm10
- xorl %ebp,%esi
- movl %ebx,%edi
- roll $5,%ebx
- xorl %edx,%esi
- movdqa %xmm10,48(%rsp)
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 4(%rsp),%ebp
- xorl %edx,%edi
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%edi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %edi,%ebp
- addl 8(%rsp),%edx
- xorl %ecx,%esi
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%esi
- addl %ebp,%edx
- rorl $7,%eax
- addl %esi,%edx
- addl 12(%rsp),%ecx
- xorl %ebx,%edi
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%edi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %edi,%ecx
- cmpq %r10,%r9
- je .Ldone_ssse3
- movdqa 64(%r11),%xmm6
- movdqa 0(%r11),%xmm9
- movdqu 0(%r9),%xmm0
- movdqu 16(%r9),%xmm1
- movdqu 32(%r9),%xmm2
- movdqu 48(%r9),%xmm3
-.byte 102,15,56,0,198
- addq $64,%r9
- addl 16(%rsp),%ebx
- xorl %eax,%esi
-.byte 102,15,56,0,206
- movl %ecx,%edi
- roll $5,%ecx
- paddd %xmm9,%xmm0
- xorl %ebp,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- movdqa %xmm0,0(%rsp)
- addl 20(%rsp),%eax
- xorl %ebp,%edi
- psubd %xmm9,%xmm0
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- addl 24(%rsp),%ebp
- xorl %edx,%esi
- movl %eax,%edi
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %esi,%ebp
- addl 28(%rsp),%edx
- xorl %ecx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ebx,%edi
- addl %ebp,%edx
- rorl $7,%eax
- addl %edi,%edx
- addl 32(%rsp),%ecx
- xorl %ebx,%esi
-.byte 102,15,56,0,214
- movl %edx,%edi
- roll $5,%edx
- paddd %xmm9,%xmm1
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %esi,%ecx
- movdqa %xmm1,16(%rsp)
- addl 36(%rsp),%ebx
- xorl %eax,%edi
- psubd %xmm9,%xmm1
- movl %ecx,%esi
- roll $5,%ecx
- xorl %ebp,%edi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %edi,%ebx
- addl 40(%rsp),%eax
- xorl %ebp,%esi
- movl %ebx,%edi
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 44(%rsp),%ebp
- xorl %edx,%edi
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%edi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %edi,%ebp
- addl 48(%rsp),%edx
- xorl %ecx,%esi
-.byte 102,15,56,0,222
- movl %ebp,%edi
- roll $5,%ebp
- paddd %xmm9,%xmm2
- xorl %ebx,%esi
- addl %ebp,%edx
- rorl $7,%eax
- addl %esi,%edx
- movdqa %xmm2,32(%rsp)
- addl 52(%rsp),%ecx
- xorl %ebx,%edi
- psubd %xmm9,%xmm2
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%edi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %edi,%ecx
- addl 56(%rsp),%ebx
- xorl %eax,%esi
- movl %ecx,%edi
- roll $5,%ecx
- xorl %ebp,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 60(%rsp),%eax
- xorl %ebp,%edi
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- addl 0(%r8),%eax
- addl 4(%r8),%esi
- addl 8(%r8),%ecx
- addl 12(%r8),%edx
- movl %eax,0(%r8)
- addl 16(%r8),%ebp
- movl %esi,4(%r8)
- movl %esi,%ebx
- movl %ecx,8(%r8)
- movl %edx,12(%r8)
- movl %ebp,16(%r8)
- jmp .Loop_ssse3
-
-.align 16
-.Ldone_ssse3:
- addl 16(%rsp),%ebx
- xorl %eax,%esi
- movl %ecx,%edi
- roll $5,%ecx
- xorl %ebp,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 20(%rsp),%eax
- xorl %ebp,%edi
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- addl 24(%rsp),%ebp
- xorl %edx,%esi
- movl %eax,%edi
- roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %esi,%ebp
- addl 28(%rsp),%edx
- xorl %ecx,%edi
- movl %ebp,%esi
- roll $5,%ebp
- xorl %ebx,%edi
- addl %ebp,%edx
- rorl $7,%eax
- addl %edi,%edx
- addl 32(%rsp),%ecx
- xorl %ebx,%esi
- movl %edx,%edi
- roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %esi,%ecx
- addl 36(%rsp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
- roll $5,%ecx
- xorl %ebp,%edi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %edi,%ebx
- addl 40(%rsp),%eax
- xorl %ebp,%esi
- movl %ebx,%edi
- roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
- addl 44(%rsp),%ebp
- xorl %edx,%edi
- movl %eax,%esi
- roll $5,%eax
- xorl %ecx,%edi
- addl %eax,%ebp
- rorl $7,%ebx
- addl %edi,%ebp
- addl 48(%rsp),%edx
- xorl %ecx,%esi
- movl %ebp,%edi
- roll $5,%ebp
- xorl %ebx,%esi
- addl %ebp,%edx
- rorl $7,%eax
- addl %esi,%edx
- addl 52(%rsp),%ecx
- xorl %ebx,%edi
- movl %edx,%esi
- roll $5,%edx
- xorl %eax,%edi
- addl %edx,%ecx
- rorl $7,%ebp
- addl %edi,%ecx
- addl 56(%rsp),%ebx
- xorl %eax,%esi
- movl %ecx,%edi
- roll $5,%ecx
- xorl %ebp,%esi
- addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- addl 60(%rsp),%eax
- xorl %ebp,%edi
- movl %ebx,%esi
- roll $5,%ebx
- xorl %edx,%edi
- addl %ebx,%eax
- rorl $7,%ecx
- addl %edi,%eax
- addl 0(%r8),%eax
- addl 4(%r8),%esi
- addl 8(%r8),%ecx
- movl %eax,0(%r8)
- addl 12(%r8),%edx
- movl %esi,4(%r8)
- addl 16(%r8),%ebp
- movl %ecx,8(%r8)
- movl %edx,12(%r8)
- movl %ebp,16(%r8)
- leaq 64(%rsp),%rsi
- movq 0(%rsi),%r12
- movq 8(%rsi),%rbp
- movq 16(%rsi),%rbx
- leaq 24(%rsi),%rsp
-.Lepilogue_ssse3:
- .byte 0xf3,0xc3
-.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
-.align 64
-K_XX_XX:
-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
-.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
diff --git a/app/openssl/crypto/sha/asm/sha1-x86_64.pl b/app/openssl/crypto/sha/asm/sha1-x86_64.pl
deleted file mode 100755
index f15c7ec3..00000000
--- a/app/openssl/crypto/sha/asm/sha1-x86_64.pl
+++ /dev/null
@@ -1,1261 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# sha1_block procedure for x86_64.
-#
-# It was brought to my attention that on EM64T compiler-generated code
-# was far behind 32-bit assembler implementation. This is unlike on
-# Opteron where compiler-generated code was only 15% behind 32-bit
-# assembler, which originally made it hard to motivate the effort.
-# There was suggestion to mechanically translate 32-bit code, but I
-# dismissed it, reasoning that x86_64 offers enough register bank
-# capacity to fully utilize SHA-1 parallelism. Therefore this fresh
-# implementation:-) However! While 64-bit code does perform better
-# on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
-# x86_64 does offer larger *addressable* bank, but out-of-order core
-# reaches for even more registers through dynamic aliasing, and EM64T
-# core must have managed to run-time optimize even 32-bit code just as
-# good as 64-bit one. Performance improvement is summarized in the
-# following table:
-#
-# gcc 3.4 32-bit asm cycles/byte
-# Opteron +45% +20% 6.8
-# Xeon P4 +65% +0% 9.9
-# Core2 +60% +10% 7.0
-
-# August 2009.
-#
-# The code was revised to minimize code size and to maximize
-# "distance" between instructions producing input to 'lea'
-# instruction and the 'lea' instruction itself, which is essential
-# for Intel Atom core.
-
-# October 2010.
-#
-# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it
-# is to offload message schedule denoted by Wt in NIST specification,
-# or Xupdate in OpenSSL source, to SIMD unit. See sha1-586.pl module
-# for background and implementation details. The only difference from
-# 32-bit code is that 64-bit code doesn't have to spill @X[] elements
-# to free temporary registers.
-
-# April 2011.
-#
-# Add AVX code path. See sha1-586.pl for further information.
-
-######################################################################
-# Current performance is summarized in following table. Numbers are
-# CPU clock cycles spent to process single byte (less is better).
-#
-# x86_64 SSSE3 AVX
-# P4 9.8 -
-# Opteron 6.6 -
-# Core2 6.7 6.1/+10% -
-# Atom 11.0 9.7/+13% -
-# Westmere 7.1 5.6/+27% -
-# Sandy Bridge 7.9 6.3/+25% 5.2/+51%
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
- $1>=2.19);
-$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
- $1>=2.09);
-$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
- `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
- $1>=10);
-
-open OUT,"| \"$^X\" $xlate $flavour $output";
-*STDOUT=*OUT;
-
-$ctx="%rdi"; # 1st arg
-$inp="%rsi"; # 2nd arg
-$num="%rdx"; # 3rd arg
-
-# reassign arguments in order to produce more compact code
-$ctx="%r8";
-$inp="%r9";
-$num="%r10";
-
-$t0="%eax";
-$t1="%ebx";
-$t2="%ecx";
-@xi=("%edx","%ebp");
-$A="%esi";
-$B="%edi";
-$C="%r11d";
-$D="%r12d";
-$E="%r13d";
-
-@V=($A,$B,$C,$D,$E);
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___ if ($i==0);
- mov `4*$i`($inp),$xi[0]
- bswap $xi[0]
- mov $xi[0],`4*$i`(%rsp)
-___
-$code.=<<___ if ($i<15);
- mov $c,$t0
- mov `4*$j`($inp),$xi[1]
- mov $a,$t2
- xor $d,$t0
- bswap $xi[1]
- rol \$5,$t2
- lea 0x5a827999($xi[0],$e),$e
- and $b,$t0
- mov $xi[1],`4*$j`(%rsp)
- add $t2,$e
- xor $d,$t0
- rol \$30,$b
- add $t0,$e
-___
-$code.=<<___ if ($i>=15);
- mov `4*($j%16)`(%rsp),$xi[1]
- mov $c,$t0
- mov $a,$t2
- xor `4*(($j+2)%16)`(%rsp),$xi[1]
- xor $d,$t0
- rol \$5,$t2
- xor `4*(($j+8)%16)`(%rsp),$xi[1]
- and $b,$t0
- lea 0x5a827999($xi[0],$e),$e
- xor `4*(($j+13)%16)`(%rsp),$xi[1]
- xor $d,$t0
- rol \$1,$xi[1]
- add $t2,$e
- rol \$30,$b
- mov $xi[1],`4*($j%16)`(%rsp)
- add $t0,$e
-___
-unshift(@xi,pop(@xi));
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-my $K=($i<40)?0x6ed9eba1:0xca62c1d6;
-$code.=<<___ if ($i<79);
- mov `4*($j%16)`(%rsp),$xi[1]
- mov $c,$t0
- mov $a,$t2
- xor `4*(($j+2)%16)`(%rsp),$xi[1]
- xor $b,$t0
- rol \$5,$t2
- lea $K($xi[0],$e),$e
- xor `4*(($j+8)%16)`(%rsp),$xi[1]
- xor $d,$t0
- add $t2,$e
- xor `4*(($j+13)%16)`(%rsp),$xi[1]
- rol \$30,$b
- add $t0,$e
- rol \$1,$xi[1]
-___
-$code.=<<___ if ($i<76);
- mov $xi[1],`4*($j%16)`(%rsp)
-___
-$code.=<<___ if ($i==79);
- mov $c,$t0
- mov $a,$t2
- xor $b,$t0
- lea $K($xi[0],$e),$e
- rol \$5,$t2
- xor $d,$t0
- add $t2,$e
- rol \$30,$b
- add $t0,$e
-___
-unshift(@xi,pop(@xi));
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $j=$i+1;
-$code.=<<___;
- mov `4*($j%16)`(%rsp),$xi[1]
- mov $c,$t0
- mov $c,$t1
- xor `4*(($j+2)%16)`(%rsp),$xi[1]
- and $d,$t0
- mov $a,$t2
- xor `4*(($j+8)%16)`(%rsp),$xi[1]
- xor $d,$t1
- lea 0x8f1bbcdc($xi[0],$e),$e
- rol \$5,$t2
- xor `4*(($j+13)%16)`(%rsp),$xi[1]
- add $t0,$e
- and $b,$t1
- rol \$1,$xi[1]
- add $t1,$e
- rol \$30,$b
- mov $xi[1],`4*($j%16)`(%rsp)
- add $t2,$e
-___
-unshift(@xi,pop(@xi));
-}
-
-$code.=<<___;
-.text
-.extern OPENSSL_ia32cap_P
-
-.globl sha1_block_data_order
-.type sha1_block_data_order,\@function,3
-.align 16
-sha1_block_data_order:
- mov OPENSSL_ia32cap_P+0(%rip),%r9d
- mov OPENSSL_ia32cap_P+4(%rip),%r8d
- test \$`1<<9`,%r8d # check SSSE3 bit
- jz .Lialu
-___
-$code.=<<___ if ($avx);
- and \$`1<<28`,%r8d # mask AVX bit
- and \$`1<<30`,%r9d # mask "Intel CPU" bit
- or %r9d,%r8d
- cmp \$`1<<28|1<<30`,%r8d
- je _avx_shortcut
-___
-$code.=<<___;
- jmp _ssse3_shortcut
-
-.align 16
-.Lialu:
- push %rbx
- push %rbp
- push %r12
- push %r13
- mov %rsp,%r11
- mov %rdi,$ctx # reassigned argument
- sub \$`8+16*4`,%rsp
- mov %rsi,$inp # reassigned argument
- and \$-64,%rsp
- mov %rdx,$num # reassigned argument
- mov %r11,`16*4`(%rsp)
-.Lprologue:
-
- mov 0($ctx),$A
- mov 4($ctx),$B
- mov 8($ctx),$C
- mov 12($ctx),$D
- mov 16($ctx),$E
- jmp .Lloop
-
-.align 16
-.Lloop:
-___
-for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- add 0($ctx),$A
- add 4($ctx),$B
- add 8($ctx),$C
- add 12($ctx),$D
- add 16($ctx),$E
- mov $A,0($ctx)
- mov $B,4($ctx)
- mov $C,8($ctx)
- mov $D,12($ctx)
- mov $E,16($ctx)
-
- sub \$1,$num
- lea `16*4`($inp),$inp
- jnz .Lloop
-
- mov `16*4`(%rsp),%rsi
- mov (%rsi),%r13
- mov 8(%rsi),%r12
- mov 16(%rsi),%rbp
- mov 24(%rsi),%rbx
- lea 32(%rsi),%rsp
-.Lepilogue:
- ret
-.size sha1_block_data_order,.-sha1_block_data_order
-___
-{{{
-my $Xi=4;
-my @X=map("%xmm$_",(4..7,0..3));
-my @Tx=map("%xmm$_",(8..10));
-my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
-my @T=("%esi","%edi");
-my $j=0;
-my $K_XX_XX="%r11";
-
-my $_rol=sub { &rol(@_) };
-my $_ror=sub { &ror(@_) };
-
-$code.=<<___;
-.type sha1_block_data_order_ssse3,\@function,3
-.align 16
-sha1_block_data_order_ssse3:
-_ssse3_shortcut:
- push %rbx
- push %rbp
- push %r12
- lea `-64-($win64?5*16:0)`(%rsp),%rsp
-___
-$code.=<<___ if ($win64);
- movaps %xmm6,64+0(%rsp)
- movaps %xmm7,64+16(%rsp)
- movaps %xmm8,64+32(%rsp)
- movaps %xmm9,64+48(%rsp)
- movaps %xmm10,64+64(%rsp)
-.Lprologue_ssse3:
-___
-$code.=<<___;
- mov %rdi,$ctx # reassigned argument
- mov %rsi,$inp # reassigned argument
- mov %rdx,$num # reassigned argument
-
- shl \$6,$num
- add $inp,$num
- lea K_XX_XX(%rip),$K_XX_XX
-
- mov 0($ctx),$A # load context
- mov 4($ctx),$B
- mov 8($ctx),$C
- mov 12($ctx),$D
- mov $B,@T[0] # magic seed
- mov 16($ctx),$E
-
- movdqa 64($K_XX_XX),@X[2] # pbswap mask
- movdqa 0($K_XX_XX),@Tx[1] # K_00_19
- movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
- movdqu 16($inp),@X[-3&7]
- movdqu 32($inp),@X[-2&7]
- movdqu 48($inp),@X[-1&7]
- pshufb @X[2],@X[-4&7] # byte swap
- add \$64,$inp
- pshufb @X[2],@X[-3&7]
- pshufb @X[2],@X[-2&7]
- pshufb @X[2],@X[-1&7]
- paddd @Tx[1],@X[-4&7] # add K_00_19
- paddd @Tx[1],@X[-3&7]
- paddd @Tx[1],@X[-2&7]
- movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU
- psubd @Tx[1],@X[-4&7] # restore X[]
- movdqa @X[-3&7],16(%rsp)
- psubd @Tx[1],@X[-3&7]
- movdqa @X[-2&7],32(%rsp)
- psubd @Tx[1],@X[-2&7]
- jmp .Loop_ssse3
-___
-
-sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
-{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
- my $arg = pop;
- $arg = "\$$arg" if ($arg*1 eq $arg);
- $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
-}
-
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
- my ($a,$b,$c,$d,$e);
-
- &movdqa (@X[0],@X[-3&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@Tx[0],@X[-1&7]);
- &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &paddd (@Tx[1],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- &psrldq (@Tx[0],4); # "X[-3]", 3 dwords
- eval(shift(@insns));
- eval(shift(@insns));
- &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- &movdqa (@Tx[2],@X[0]);
- &movdqa (@Tx[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword
- &paddd (@X[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &psrld (@Tx[0],31);
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@Tx[1],@Tx[2]);
- eval(shift(@insns));
- eval(shift(@insns));
-
- &psrld (@Tx[2],30);
- &por (@X[0],@Tx[0]); # "X[0]"<<<=1
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pslld (@Tx[1],2);
- &pxor (@X[0],@Tx[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
- eval(shift(@insns));
- eval(shift(@insns));
-
- &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
- push(@Tx,shift(@Tx));
-}
-
-sub Xupdate_ssse3_32_79()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
- my ($a,$b,$c,$d,$e);
-
- &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8);
- eval(shift(@insns)); # body_20_39
- &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
- &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
- eval(shift(@insns));
- eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
- if ($Xi%5) {
- &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
- } else { # ... or load next one
- &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
- }
- &paddd (@Tx[1],@X[-1&7]);
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &movdqa (@Tx[0],@X[0]);
- &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &pslld (@X[0],2);
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &psrld (@Tx[0],30);
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &por (@X[0],@Tx[0]); # "X[0]"<<<=2
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &movdqa (@Tx[1],@X[0]) if ($Xi<19);
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
- push(@Tx,shift(@Tx));
-}
-
-sub Xuplast_ssse3_80()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- &paddd (@Tx[1],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
-
- foreach (@insns) { eval; } # remaining instructions
-
- &cmp ($inp,$num);
- &je (".Ldone_ssse3");
-
- unshift(@Tx,pop(@Tx));
-
- &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask
- &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19
- &movdqu (@X[-4&7],"0($inp)"); # load input
- &movdqu (@X[-3&7],"16($inp)");
- &movdqu (@X[-2&7],"32($inp)");
- &movdqu (@X[-1&7],"48($inp)");
- &pshufb (@X[-4&7],@X[2]); # byte swap
- &add ($inp,64);
-
- $Xi=0;
-}
-
-sub Xloop_ssse3()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &pshufb (@X[($Xi-3)&7],@X[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &paddd (@X[($Xi-4)&7],@Tx[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- &psubd (@X[($Xi-4)&7],@Tx[1]);
-
- foreach (@insns) { eval; }
- $Xi++;
-}
-
-sub Xtail_ssse3()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- foreach (@insns) { eval; }
-}
-
-sub body_00_19 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer
- '&xor ($c,$d);',
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&and (@T[0],$c);', # ($b&($c^$d))
- '&xor ($c,$d);', # restore $c
- '&xor (@T[0],$d);',
- '&add ($e,$a);',
- '&$_ror ($b,$j?7:2);', # $b>>>2
- '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-
-sub body_20_39 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
- '&xor (@T[0],$d);', # ($b^$d)
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&xor (@T[0],$c);', # ($b^$d^$c)
- '&add ($e,$a);',
- '&$_ror ($b,7);', # $b>>>2
- '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-
-sub body_40_59 () {
- (
- '($a,$b,$c,$d,$e)=@V;'.
- '&mov (@T[1],$c);',
- '&xor ($c,$d);',
- '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
- '&and (@T[1],$d);',
- '&and (@T[0],$c);', # ($b&($c^$d))
- '&$_ror ($b,7);', # $b>>>2
- '&add ($e,@T[1]);',
- '&mov (@T[1],$a);', # $b in next round
- '&$_rol ($a,5);',
- '&add ($e,@T[0]);',
- '&xor ($c,$d);', # restore $c
- '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
- );
-}
-$code.=<<___;
-.align 16
-.Loop_ssse3:
-___
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_16_31(\&body_00_19);
- &Xupdate_ssse3_32_79(\&body_00_19);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_40_59);
- &Xupdate_ssse3_32_79(\&body_20_39);
- &Xuplast_ssse3_80(\&body_20_39); # can jump to "done"
-
- $saved_j=$j; @saved_V=@V;
-
- &Xloop_ssse3(\&body_20_39);
- &Xloop_ssse3(\&body_20_39);
- &Xloop_ssse3(\&body_20_39);
-
-$code.=<<___;
- add 0($ctx),$A # update context
- add 4($ctx),@T[0]
- add 8($ctx),$C
- add 12($ctx),$D
- mov $A,0($ctx)
- add 16($ctx),$E
- mov @T[0],4($ctx)
- mov @T[0],$B # magic seed
- mov $C,8($ctx)
- mov $D,12($ctx)
- mov $E,16($ctx)
- jmp .Loop_ssse3
-
-.align 16
-.Ldone_ssse3:
-___
- $j=$saved_j; @V=@saved_V;
-
- &Xtail_ssse3(\&body_20_39);
- &Xtail_ssse3(\&body_20_39);
- &Xtail_ssse3(\&body_20_39);
-
-$code.=<<___;
- add 0($ctx),$A # update context
- add 4($ctx),@T[0]
- add 8($ctx),$C
- mov $A,0($ctx)
- add 12($ctx),$D
- mov @T[0],4($ctx)
- add 16($ctx),$E
- mov $C,8($ctx)
- mov $D,12($ctx)
- mov $E,16($ctx)
-___
-$code.=<<___ if ($win64);
- movaps 64+0(%rsp),%xmm6
- movaps 64+16(%rsp),%xmm7
- movaps 64+32(%rsp),%xmm8
- movaps 64+48(%rsp),%xmm9
- movaps 64+64(%rsp),%xmm10
-___
-$code.=<<___;
- lea `64+($win64?5*16:0)`(%rsp),%rsi
- mov 0(%rsi),%r12
- mov 8(%rsi),%rbp
- mov 16(%rsi),%rbx
- lea 24(%rsi),%rsp
-.Lepilogue_ssse3:
- ret
-.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
-___
-
-if ($avx) {
-my $Xi=4;
-my @X=map("%xmm$_",(4..7,0..3));
-my @Tx=map("%xmm$_",(8..10));
-my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
-my @T=("%esi","%edi");
-my $j=0;
-my $K_XX_XX="%r11";
-
-my $_rol=sub { &shld(@_[0],@_) };
-my $_ror=sub { &shrd(@_[0],@_) };
-
-$code.=<<___;
-.type sha1_block_data_order_avx,\@function,3
-.align 16
-sha1_block_data_order_avx:
-_avx_shortcut:
- push %rbx
- push %rbp
- push %r12
- lea `-64-($win64?5*16:0)`(%rsp),%rsp
-___
-$code.=<<___ if ($win64);
- movaps %xmm6,64+0(%rsp)
- movaps %xmm7,64+16(%rsp)
- movaps %xmm8,64+32(%rsp)
- movaps %xmm9,64+48(%rsp)
- movaps %xmm10,64+64(%rsp)
-.Lprologue_avx:
-___
-$code.=<<___;
- mov %rdi,$ctx # reassigned argument
- mov %rsi,$inp # reassigned argument
- mov %rdx,$num # reassigned argument
- vzeroupper
-
- shl \$6,$num
- add $inp,$num
- lea K_XX_XX(%rip),$K_XX_XX
-
- mov 0($ctx),$A # load context
- mov 4($ctx),$B
- mov 8($ctx),$C
- mov 12($ctx),$D
- mov $B,@T[0] # magic seed
- mov 16($ctx),$E
-
- vmovdqa 64($K_XX_XX),@X[2] # pbswap mask
- vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19
- vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
- vmovdqu 16($inp),@X[-3&7]
- vmovdqu 32($inp),@X[-2&7]
- vmovdqu 48($inp),@X[-1&7]
- vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap
- add \$64,$inp
- vpshufb @X[2],@X[-3&7],@X[-3&7]
- vpshufb @X[2],@X[-2&7],@X[-2&7]
- vpshufb @X[2],@X[-1&7],@X[-1&7]
- vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19
- vpaddd @Tx[1],@X[-3&7],@X[1]
- vpaddd @Tx[1],@X[-2&7],@X[2]
- vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU
- vmovdqa @X[1],16(%rsp)
- vmovdqa @X[2],32(%rsp)
- jmp .Loop_avx
-___
-
-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords
- eval(shift(@insns));
- eval(shift(@insns));
- &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpsrld (@Tx[0],@X[0],31);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword
- &vpaddd (@X[0],@X[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpsrld (@Tx[1],@Tx[2],30);
- &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpslld (@Tx[2],@Tx[2],2);
- &vpxor (@X[0],@X[0],@Tx[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
- eval(shift(@insns));
- eval(shift(@insns));
-
-
- foreach (@insns) { eval; } # remaining instructions [if any]
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
- push(@Tx,shift(@Tx));
-}
-
-sub Xupdate_avx_32_79()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
- my ($a,$b,$c,$d,$e);
-
- &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]"
- &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
- eval(shift(@insns));
- eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
- if ($Xi%5) {
- &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
- } else { # ... or load next one
- &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
- }
- &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]"
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
-
- &vpsrld (@Tx[0],@X[0],30);
- &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpslld (@X[0],@X[0],2);
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # ror
- eval(shift(@insns));
-
- &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2
- eval(shift(@insns)); # body_20_39
- eval(shift(@insns));
- &vmovdqa (@Tx[1],@X[0]) if ($Xi<19);
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns)); # rol
- eval(shift(@insns));
-
- foreach (@insns) { eval; } # remaining instructions
-
- $Xi++; push(@X,shift(@X)); # "rotate" X[]
- push(@Tx,shift(@Tx));
-}
-
-sub Xuplast_avx_80()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
-
- &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
-
- foreach (@insns) { eval; } # remaining instructions
-
- &cmp ($inp,$num);
- &je (".Ldone_avx");
-
- unshift(@Tx,pop(@Tx));
-
- &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask
- &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19
- &vmovdqu(@X[-4&7],"0($inp)"); # load input
- &vmovdqu(@X[-3&7],"16($inp)");
- &vmovdqu(@X[-2&7],"32($inp)");
- &vmovdqu(@X[-1&7],"48($inp)");
- &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap
- &add ($inp,64);
-
- $Xi=0;
-}
-
-sub Xloop_avx()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- eval(shift(@insns));
- eval(shift(@insns));
- &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU
- eval(shift(@insns));
- eval(shift(@insns));
-
- foreach (@insns) { eval; }
- $Xi++;
-}
-
-sub Xtail_avx()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
- my ($a,$b,$c,$d,$e);
-
- foreach (@insns) { eval; }
-}
-
-$code.=<<___;
-.align 16
-.Loop_avx:
-___
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_16_31(\&body_00_19);
- &Xupdate_avx_32_79(\&body_00_19);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_40_59);
- &Xupdate_avx_32_79(\&body_20_39);
- &Xuplast_avx_80(\&body_20_39); # can jump to "done"
-
- $saved_j=$j; @saved_V=@V;
-
- &Xloop_avx(\&body_20_39);
- &Xloop_avx(\&body_20_39);
- &Xloop_avx(\&body_20_39);
-
-$code.=<<___;
- add 0($ctx),$A # update context
- add 4($ctx),@T[0]
- add 8($ctx),$C
- add 12($ctx),$D
- mov $A,0($ctx)
- add 16($ctx),$E
- mov @T[0],4($ctx)
- mov @T[0],$B # magic seed
- mov $C,8($ctx)
- mov $D,12($ctx)
- mov $E,16($ctx)
- jmp .Loop_avx
-
-.align 16
-.Ldone_avx:
-___
- $j=$saved_j; @V=@saved_V;
-
- &Xtail_avx(\&body_20_39);
- &Xtail_avx(\&body_20_39);
- &Xtail_avx(\&body_20_39);
-
-$code.=<<___;
- vzeroupper
-
- add 0($ctx),$A # update context
- add 4($ctx),@T[0]
- add 8($ctx),$C
- mov $A,0($ctx)
- add 12($ctx),$D
- mov @T[0],4($ctx)
- add 16($ctx),$E
- mov $C,8($ctx)
- mov $D,12($ctx)
- mov $E,16($ctx)
-___
-$code.=<<___ if ($win64);
- movaps 64+0(%rsp),%xmm6
- movaps 64+16(%rsp),%xmm7
- movaps 64+32(%rsp),%xmm8
- movaps 64+48(%rsp),%xmm9
- movaps 64+64(%rsp),%xmm10
-___
-$code.=<<___;
- lea `64+($win64?5*16:0)`(%rsp),%rsi
- mov 0(%rsi),%r12
- mov 8(%rsi),%rbp
- mov 16(%rsi),%rbx
- lea 24(%rsi),%rsp
-.Lepilogue_avx:
- ret
-.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
-___
-}
-$code.=<<___;
-.align 64
-K_XX_XX:
-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19
-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39
-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59
-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79
-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask
-___
-}}}
-$code.=<<___;
-.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
-.align 64
-___
-
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type se_handler,\@abi-omnipotent
-.align 16
-se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- lea .Lprologue(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lprologue
- jb .Lcommon_seh_tail
-
- mov 152($context),%rax # pull context->Rsp
-
- lea .Lepilogue(%rip),%r10
- cmp %r10,%rbx # context->Rip>=.Lepilogue
- jae .Lcommon_seh_tail
-
- mov `16*4`(%rax),%rax # pull saved stack pointer
- lea 32(%rax),%rax
-
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
- mov -24(%rax),%r12
- mov -32(%rax),%r13
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
-
- jmp .Lcommon_seh_tail
-.size se_handler,.-se_handler
-
-.type ssse3_handler,\@abi-omnipotent
-.align 16
-ssse3_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<prologue label
- jb .Lcommon_seh_tail
-
- mov 152($context),%rax # pull context->Rsp
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lcommon_seh_tail
-
- lea 64(%rax),%rsi
- lea 512($context),%rdi # &context.Xmm6
- mov \$10,%ecx
- .long 0xa548f3fc # cld; rep movsq
- lea `24+64+5*16`(%rax),%rax # adjust stack pointer
-
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
- mov -24(%rax),%r12
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore cotnext->R12
-
-.Lcommon_seh_tail:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$154,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size ssse3_handler,.-ssse3_handler
-
-.section .pdata
-.align 4
- .rva .LSEH_begin_sha1_block_data_order
- .rva .LSEH_end_sha1_block_data_order
- .rva .LSEH_info_sha1_block_data_order
- .rva .LSEH_begin_sha1_block_data_order_ssse3
- .rva .LSEH_end_sha1_block_data_order_ssse3
- .rva .LSEH_info_sha1_block_data_order_ssse3
-___
-$code.=<<___ if ($avx);
- .rva .LSEH_begin_sha1_block_data_order_avx
- .rva .LSEH_end_sha1_block_data_order_avx
- .rva .LSEH_info_sha1_block_data_order_avx
-___
-$code.=<<___;
-.section .xdata
-.align 8
-.LSEH_info_sha1_block_data_order:
- .byte 9,0,0,0
- .rva se_handler
-.LSEH_info_sha1_block_data_order_ssse3:
- .byte 9,0,0,0
- .rva ssse3_handler
- .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[]
-___
-$code.=<<___ if ($avx);
-.LSEH_info_sha1_block_data_order_avx:
- .byte 9,0,0,0
- .rva ssse3_handler
- .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[]
-___
-}
-
-####################################################################
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha256-586.S b/app/openssl/crypto/sha/asm/sha256-586.S
deleted file mode 100644
index 77a89514..00000000
--- a/app/openssl/crypto/sha/asm/sha256-586.S
+++ /dev/null
@@ -1,258 +0,0 @@
-.file "sha512-586.s"
-.text
-.globl sha256_block_data_order
-.type sha256_block_data_order,@function
-.align 16
-sha256_block_data_order:
-.L_sha256_block_data_order_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%esi
- movl 24(%esp),%edi
- movl 28(%esp),%eax
- movl %esp,%ebx
- call .L000pic_point
-.L000pic_point:
- popl %ebp
- leal .L001K256-.L000pic_point(%ebp),%ebp
- subl $16,%esp
- andl $-64,%esp
- shll $6,%eax
- addl %edi,%eax
- movl %esi,(%esp)
- movl %edi,4(%esp)
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
-.align 16
-.L002loop:
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- movl 12(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 16(%edi),%eax
- movl 20(%edi),%ebx
- movl 24(%edi),%ecx
- movl 28(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 32(%edi),%eax
- movl 36(%edi),%ebx
- movl 40(%edi),%ecx
- movl 44(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 48(%edi),%eax
- movl 52(%edi),%ebx
- movl 56(%edi),%ecx
- movl 60(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- addl $64,%edi
- subl $32,%esp
- movl %edi,100(%esp)
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,4(%esp)
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%ebx
- movl 24(%esi),%ecx
- movl 28(%esi),%edi
- movl %ebx,20(%esp)
- movl %ecx,24(%esp)
- movl %edi,28(%esp)
-.align 16
-.L00300_15:
- movl 92(%esp),%ebx
- movl %edx,%ecx
- rorl $14,%ecx
- movl 20(%esp),%esi
- xorl %edx,%ecx
- rorl $5,%ecx
- xorl %edx,%ecx
- rorl $6,%ecx
- movl 24(%esp),%edi
- addl %ecx,%ebx
- xorl %edi,%esi
- movl %edx,16(%esp)
- movl %eax,%ecx
- andl %edx,%esi
- movl 12(%esp),%edx
- xorl %edi,%esi
- movl %eax,%edi
- addl %esi,%ebx
- rorl $9,%ecx
- addl 28(%esp),%ebx
- xorl %eax,%ecx
- rorl $11,%ecx
- movl 4(%esp),%esi
- xorl %eax,%ecx
- rorl $2,%ecx
- addl %ebx,%edx
- movl 8(%esp),%edi
- addl %ecx,%ebx
- movl %eax,(%esp)
- movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
- andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- addl %esi,%edx
- addl %esi,%eax
- cmpl $3248222580,%esi
- jne .L00300_15
- movl 152(%esp),%ebx
-.align 16
-.L00416_63:
- movl %ebx,%esi
- movl 100(%esp),%ecx
- rorl $11,%esi
- movl %ecx,%edi
- xorl %ebx,%esi
- rorl $7,%esi
- shrl $3,%ebx
- rorl $2,%edi
- xorl %esi,%ebx
- xorl %ecx,%edi
- rorl $17,%edi
- shrl $10,%ecx
- addl 156(%esp),%ebx
- xorl %ecx,%edi
- addl 120(%esp),%ebx
- movl %edx,%ecx
- addl %edi,%ebx
- rorl $14,%ecx
- movl 20(%esp),%esi
- xorl %edx,%ecx
- rorl $5,%ecx
- movl %ebx,92(%esp)
- xorl %edx,%ecx
- rorl $6,%ecx
- movl 24(%esp),%edi
- addl %ecx,%ebx
- xorl %edi,%esi
- movl %edx,16(%esp)
- movl %eax,%ecx
- andl %edx,%esi
- movl 12(%esp),%edx
- xorl %edi,%esi
- movl %eax,%edi
- addl %esi,%ebx
- rorl $9,%ecx
- addl 28(%esp),%ebx
- xorl %eax,%ecx
- rorl $11,%ecx
- movl 4(%esp),%esi
- xorl %eax,%ecx
- rorl $2,%ecx
- addl %ebx,%edx
- movl 8(%esp),%edi
- addl %ecx,%ebx
- movl %eax,(%esp)
- movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
- andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- movl 152(%esp),%ebx
- addl %esi,%edx
- addl %esi,%eax
- cmpl $3329325298,%esi
- jne .L00416_63
- movl 352(%esp),%esi
- movl 4(%esp),%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edi
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%ecx
- addl 12(%esi),%edi
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %ecx,8(%esi)
- movl %edi,12(%esi)
- movl 20(%esp),%eax
- movl 24(%esp),%ebx
- movl 28(%esp),%ecx
- movl 356(%esp),%edi
- addl 16(%esi),%edx
- addl 20(%esi),%eax
- addl 24(%esi),%ebx
- addl 28(%esi),%ecx
- movl %edx,16(%esi)
- movl %eax,20(%esi)
- movl %ebx,24(%esi)
- movl %ecx,28(%esi)
- addl $352,%esp
- subl $256,%ebp
- cmpl 8(%esp),%edi
- jb .L002loop
- movl 12(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 64
-.L001K256:
-.long 1116352408,1899447441,3049323471,3921009573
-.long 961987163,1508970993,2453635748,2870763221
-.long 3624381080,310598401,607225278,1426881987
-.long 1925078388,2162078206,2614888103,3248222580
-.long 3835390401,4022224774,264347078,604807628
-.long 770255983,1249150122,1555081692,1996064986
-.long 2554220882,2821834349,2952996808,3210313671
-.long 3336571891,3584528711,113926993,338241895
-.long 666307205,773529912,1294757372,1396182291
-.long 1695183700,1986661051,2177026350,2456956037
-.long 2730485921,2820302411,3259730800,3345764771
-.long 3516065817,3600352804,4094571909,275423344
-.long 430227734,506948616,659060556,883997877
-.long 958139571,1322822218,1537002063,1747873779
-.long 1955562222,2024104815,2227730452,2361852424
-.long 2428436474,2756734187,3204031479,3329325298
-.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
-.byte 62,0
diff --git a/app/openssl/crypto/sha/asm/sha256-586.pl b/app/openssl/crypto/sha/asm/sha256-586.pl
deleted file mode 100644
index 52a7c7f8..00000000
--- a/app/openssl/crypto/sha/asm/sha256-586.pl
+++ /dev/null
@@ -1,249 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# SHA256 block transform for x86. September 2007.
-#
-# Performance in clock cycles per processed byte (less is better):
-#
-# Pentium PIII P4 AMD K8 Core2
-# gcc 46 36 41 27 26
-# icc 57 33 38 25 23
-# x86 asm 40 30 33 20 18
-# x86_64 asm(*) - - 21 16 16
-#
-# (*) x86_64 assembler performance is presented for reference
-# purposes.
-#
-# Performance improvement over compiler generated code varies from
-# 10% to 40% [see above]. Not very impressive on some µ-archs, but
-# it's 5 times smaller and optimizies amount of writes.
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
-
-$A="eax";
-$E="edx";
-$T="ebx";
-$Aoff=&DWP(0,"esp");
-$Boff=&DWP(4,"esp");
-$Coff=&DWP(8,"esp");
-$Doff=&DWP(12,"esp");
-$Eoff=&DWP(16,"esp");
-$Foff=&DWP(20,"esp");
-$Goff=&DWP(24,"esp");
-$Hoff=&DWP(28,"esp");
-$Xoff=&DWP(32,"esp");
-$K256="ebp";
-
-sub BODY_00_15() {
- my $in_16_63=shift;
-
- &mov ("ecx",$E);
- &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
- &ror ("ecx",25-11);
- &mov ("esi",$Foff);
- &xor ("ecx",$E);
- &ror ("ecx",11-6);
- &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
- &xor ("ecx",$E);
- &ror ("ecx",6); # Sigma1(e)
- &mov ("edi",$Goff);
- &add ($T,"ecx"); # T += Sigma1(e)
-
- &xor ("esi","edi");
- &mov ($Eoff,$E); # modulo-scheduled
- &mov ("ecx",$A);
- &and ("esi",$E);
- &mov ($E,$Doff); # e becomes d, which is e in next iteration
- &xor ("esi","edi"); # Ch(e,f,g)
- &mov ("edi",$A);
- &add ($T,"esi"); # T += Ch(e,f,g)
-
- &ror ("ecx",22-13);
- &add ($T,$Hoff); # T += h
- &xor ("ecx",$A);
- &ror ("ecx",13-2);
- &mov ("esi",$Boff);
- &xor ("ecx",$A);
- &ror ("ecx",2); # Sigma0(a)
- &add ($E,$T); # d += T
- &mov ("edi",$Coff);
-
- &add ($T,"ecx"); # T += Sigma0(a)
- &mov ($Aoff,$A); # modulo-scheduled
-
- &mov ("ecx",$A);
- &sub ("esp",4);
- &or ($A,"esi"); # a becomes h, which is a in next iteration
- &and ("ecx","esi");
- &and ($A,"edi");
- &mov ("esi",&DWP(0,$K256));
- &or ($A,"ecx"); # h=Maj(a,b,c)
-
- &add ($K256,4);
- &add ($A,$T); # h += T
- &mov ($T,&DWP(4*(8+15+16-1),"esp")) if ($in_16_63); # preload T
- &add ($E,"esi"); # d += K256[i]
- &add ($A,"esi"); # h += K256[i]
-}
-
-&function_begin("sha256_block_data_order");
- &mov ("esi",wparam(0)); # ctx
- &mov ("edi",wparam(1)); # inp
- &mov ("eax",wparam(2)); # num
- &mov ("ebx","esp"); # saved sp
-
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K256);
- &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));
-
- &sub ("esp",16);
- &and ("esp",-64);
-
- &shl ("eax",6);
- &add ("eax","edi");
- &mov (&DWP(0,"esp"),"esi"); # ctx
- &mov (&DWP(4,"esp"),"edi"); # inp
- &mov (&DWP(8,"esp"),"eax"); # inp+num*128
- &mov (&DWP(12,"esp"),"ebx"); # saved sp
-
-&set_label("loop",16);
- # copy input block to stack reversing byte and dword order
- for($i=0;$i<4;$i++) {
- &mov ("eax",&DWP($i*16+0,"edi"));
- &mov ("ebx",&DWP($i*16+4,"edi"));
- &mov ("ecx",&DWP($i*16+8,"edi"));
- &mov ("edx",&DWP($i*16+12,"edi"));
- &bswap ("eax");
- &bswap ("ebx");
- &bswap ("ecx");
- &bswap ("edx");
- &push ("eax");
- &push ("ebx");
- &push ("ecx");
- &push ("edx");
- }
- &add ("edi",64);
- &sub ("esp",4*8); # place for A,B,C,D,E,F,G,H
- &mov (&DWP(4*(8+16)+4,"esp"),"edi");
-
- # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
- &mov ($A,&DWP(0,"esi"));
- &mov ("ebx",&DWP(4,"esi"));
- &mov ("ecx",&DWP(8,"esi"));
- &mov ("edi",&DWP(12,"esi"));
- # &mov ($Aoff,$A);
- &mov ($Boff,"ebx");
- &mov ($Coff,"ecx");
- &mov ($Doff,"edi");
- &mov ($E,&DWP(16,"esi"));
- &mov ("ebx",&DWP(20,"esi"));
- &mov ("ecx",&DWP(24,"esi"));
- &mov ("edi",&DWP(28,"esi"));
- # &mov ($Eoff,$E);
- &mov ($Foff,"ebx");
- &mov ($Goff,"ecx");
- &mov ($Hoff,"edi");
-
-&set_label("00_15",16);
- &mov ($T,&DWP(4*(8+15),"esp"));
-
- &BODY_00_15();
-
- &cmp ("esi",0xc19bf174);
- &jne (&label("00_15"));
-
- &mov ($T,&DWP(4*(8+15+16-1),"esp")); # preloaded in BODY_00_15(1)
-&set_label("16_63",16);
- &mov ("esi",$T);
- &mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
- &ror ("esi",18-7);
- &mov ("edi","ecx");
- &xor ("esi",$T);
- &ror ("esi",7);
- &shr ($T,3);
-
- &ror ("edi",19-17);
- &xor ($T,"esi"); # T = sigma0(X[-15])
- &xor ("edi","ecx");
- &ror ("edi",17);
- &shr ("ecx",10);
- &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
- &xor ("edi","ecx"); # sigma1(X[-2])
-
- &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
- # &add ($T,"edi"); # T += sigma1(X[-2])
- # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
-
- &BODY_00_15(1);
-
- &cmp ("esi",0xc67178f2);
- &jne (&label("16_63"));
-
- &mov ("esi",&DWP(4*(8+16+64)+0,"esp"));#ctx
- # &mov ($A,$Aoff);
- &mov ("ebx",$Boff);
- &mov ("ecx",$Coff);
- &mov ("edi",$Doff);
- &add ($A,&DWP(0,"esi"));
- &add ("ebx",&DWP(4,"esi"));
- &add ("ecx",&DWP(8,"esi"));
- &add ("edi",&DWP(12,"esi"));
- &mov (&DWP(0,"esi"),$A);
- &mov (&DWP(4,"esi"),"ebx");
- &mov (&DWP(8,"esi"),"ecx");
- &mov (&DWP(12,"esi"),"edi");
- # &mov ($E,$Eoff);
- &mov ("eax",$Foff);
- &mov ("ebx",$Goff);
- &mov ("ecx",$Hoff);
- &mov ("edi",&DWP(4*(8+16+64)+4,"esp"));#inp
- &add ($E,&DWP(16,"esi"));
- &add ("eax",&DWP(20,"esi"));
- &add ("ebx",&DWP(24,"esi"));
- &add ("ecx",&DWP(28,"esi"));
- &mov (&DWP(16,"esi"),$E);
- &mov (&DWP(20,"esi"),"eax");
- &mov (&DWP(24,"esi"),"ebx");
- &mov (&DWP(28,"esi"),"ecx");
-
- &add ("esp",4*(8+16+64)); # destroy frame
- &sub ($K256,4*64); # rewind K
-
- &cmp ("edi",&DWP(8,"esp")); # are we done yet?
- &jb (&label("loop"));
-
- &mov ("esp",&DWP(12,"esp")); # restore sp
-&function_end_A();
-
-&set_label("K256",64); # Yes! I keep it in the code segment!
- &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
- &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
- &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
- &data_word(0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174);
- &data_word(0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc);
- &data_word(0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da);
- &data_word(0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7);
- &data_word(0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967);
- &data_word(0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13);
- &data_word(0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85);
- &data_word(0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3);
- &data_word(0xd192e819,0xd6990624,0xf40e3585,0x106aa070);
- &data_word(0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5);
- &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
- &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
- &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
-&asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.S b/app/openssl/crypto/sha/asm/sha256-armv4.S
deleted file mode 100644
index 853d7da5..00000000
--- a/app/openssl/crypto/sha/asm/sha256-armv4.S
+++ /dev/null
@@ -1,2690 +0,0 @@
-#include "arm_arch.h"
-
-.text
-.code 32
-
-.type K256,%object
-.align 5
-K256:
-.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.size K256,.-K256
-.word 0 @ terminator
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha256_block_data_order
-.align 5
-
-.global sha256_block_data_order
-.type sha256_block_data_order,%function
-sha256_block_data_order:
- sub r3,pc,#8 @ sha256_block_data_order
- add r2,r1,r2,lsl#6 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#ARMV8_SHA256
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
- stmdb sp!,{r0,r1,r2,r4-r11,lr}
- ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
- sub r14,r3,#256+32 @ K256
- sub sp,sp,#16*4 @ alloca(X[16])
-.Loop:
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ magic
- eor r12,r12,r12
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 0
-# if 0==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 0
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 0==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#0*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 0==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 0<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#2*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#15*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 1
-# if 1==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 1
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 1==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#1*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 1==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 1<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#3*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#0*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 2
-# if 2==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 2
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 2==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#2*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 2==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 2<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#4*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#1*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 3
-# if 3==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 3
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 3==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#3*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 3==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 3<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#5*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#2*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 4
-# if 4==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 4
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 4==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#4*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 4==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 4<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#6*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#3*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 5
-# if 5==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 5==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#5*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 5==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 5<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#7*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#4*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 6
-# if 6==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 6
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 6==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#6*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 6==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 6<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#8*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#5*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 7
-# if 7==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 7==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#7*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 7==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 7<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#9*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#6*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 8
-# if 8==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 8
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 8==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#8*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 8==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 8<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#10*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#7*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 9
-# if 9==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 9
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 9==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#9*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 9==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 9<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#11*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#8*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 10
-# if 10==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 10
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 10==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#10*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 10==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 10<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#12*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#9*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 11
-# if 11==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 11
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 11==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#11*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 11==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 11<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#13*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#10*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 12
-# if 12==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 12
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 12==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#12*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 12==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 12<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#14*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#11*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 13
-# if 13==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 13
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 13==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#13*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 13==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 13<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#15*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#12*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 14
-# if 14==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 14
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 14==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#14*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 14==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 14<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#0*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#13*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 15
-# if 15==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- rev r2,r2
-#else
- @ ldrb r2,[r1,#3] @ 15
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 15==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#15*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 15==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 15<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#1*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#14*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
-.Lrounds_16_xx:
- @ ldr r2,[sp,#1*4] @ 16
- @ ldr r1,[sp,#14*4]
- mov r0,r2,ror#7
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#0*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#9*4]
-
- add r12,r12,r0
- eor r0,r8,r8,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#0*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 16==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 16<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#2*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#15*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#2*4] @ 17
- @ ldr r1,[sp,#15*4]
- mov r0,r2,ror#7
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#1*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#10*4]
-
- add r3,r3,r0
- eor r0,r7,r7,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#1*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 17==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 17<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#3*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#0*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#3*4] @ 18
- @ ldr r1,[sp,#0*4]
- mov r0,r2,ror#7
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#2*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#11*4]
-
- add r12,r12,r0
- eor r0,r6,r6,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#2*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 18==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 18<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#4*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#1*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#4*4] @ 19
- @ ldr r1,[sp,#1*4]
- mov r0,r2,ror#7
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#3*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#12*4]
-
- add r3,r3,r0
- eor r0,r5,r5,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#3*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 19==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 19<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#5*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#2*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#5*4] @ 20
- @ ldr r1,[sp,#2*4]
- mov r0,r2,ror#7
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#4*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#13*4]
-
- add r12,r12,r0
- eor r0,r4,r4,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#4*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 20==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 20<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#6*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#3*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#6*4] @ 21
- @ ldr r1,[sp,#3*4]
- mov r0,r2,ror#7
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#5*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#14*4]
-
- add r3,r3,r0
- eor r0,r11,r11,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#5*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 21==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 21<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#7*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#4*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#7*4] @ 22
- @ ldr r1,[sp,#4*4]
- mov r0,r2,ror#7
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#6*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#15*4]
-
- add r12,r12,r0
- eor r0,r10,r10,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#6*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 22==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 22<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#8*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#5*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#8*4] @ 23
- @ ldr r1,[sp,#5*4]
- mov r0,r2,ror#7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#7*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#0*4]
-
- add r3,r3,r0
- eor r0,r9,r9,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#7*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 23==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 23<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#9*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#6*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#9*4] @ 24
- @ ldr r1,[sp,#6*4]
- mov r0,r2,ror#7
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#8*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#1*4]
-
- add r12,r12,r0
- eor r0,r8,r8,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#8*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 24==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 24<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#10*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#7*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#10*4] @ 25
- @ ldr r1,[sp,#7*4]
- mov r0,r2,ror#7
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#9*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#2*4]
-
- add r3,r3,r0
- eor r0,r7,r7,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#9*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 25==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 25<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#11*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#8*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#11*4] @ 26
- @ ldr r1,[sp,#8*4]
- mov r0,r2,ror#7
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#10*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#3*4]
-
- add r12,r12,r0
- eor r0,r6,r6,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#10*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 26==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 26<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#12*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#9*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#12*4] @ 27
- @ ldr r1,[sp,#9*4]
- mov r0,r2,ror#7
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#11*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#4*4]
-
- add r3,r3,r0
- eor r0,r5,r5,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#11*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 27==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 27<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#13*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#10*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#13*4] @ 28
- @ ldr r1,[sp,#10*4]
- mov r0,r2,ror#7
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#12*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#5*4]
-
- add r12,r12,r0
- eor r0,r4,r4,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#12*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 28==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 28<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#14*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#11*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#14*4] @ 29
- @ ldr r1,[sp,#11*4]
- mov r0,r2,ror#7
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#13*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#6*4]
-
- add r3,r3,r0
- eor r0,r11,r11,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#13*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 29==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 29<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#15*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#12*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#15*4] @ 30
- @ ldr r1,[sp,#12*4]
- mov r0,r2,ror#7
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#14*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#7*4]
-
- add r12,r12,r0
- eor r0,r10,r10,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#14*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 30==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 30<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#0*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#13*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#0*4] @ 31
- @ ldr r1,[sp,#13*4]
- mov r0,r2,ror#7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#15*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#8*4]
-
- add r3,r3,r0
- eor r0,r9,r9,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#15*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 31==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 31<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#1*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#14*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
- ldreq r3,[sp,#16*4] @ pull ctx
- bne .Lrounds_16_xx
-
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldr r0,[r3,#0]
- ldr r2,[r3,#4]
- ldr r12,[r3,#8]
- add r4,r4,r0
- ldr r0,[r3,#12]
- add r5,r5,r2
- ldr r2,[r3,#16]
- add r6,r6,r12
- ldr r12,[r3,#20]
- add r7,r7,r0
- ldr r0,[r3,#24]
- add r8,r8,r2
- ldr r2,[r3,#28]
- add r9,r9,r12
- ldr r1,[sp,#17*4] @ pull inp
- ldr r12,[sp,#18*4] @ pull inp+len
- add r10,r10,r0
- add r11,r11,r2
- stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
- cmp r1,r12
- sub r14,r14,#256 @ rewind Ktbl
- bne .Loop
-
- add sp,sp,#19*4 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r11,pc}
-#else
- ldmia sp!,{r4-r11,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size sha256_block_data_order,.-sha256_block_data_order
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.type sha256_block_data_order_neon,%function
-.align 4
-sha256_block_data_order_neon:
-.LNEON:
- stmdb sp!,{r4-r12,lr}
-
- mov r12,sp
- sub sp,sp,#16*4+16 @ alloca
- sub r14,r3,#256+32 @ K256
- bic sp,sp,#15 @ align for 128-bit stores
-
- vld1.8 {q0},[r1]!
- vld1.8 {q1},[r1]!
- vld1.8 {q2},[r1]!
- vld1.8 {q3},[r1]!
- vld1.32 {q8},[r14,:128]!
- vld1.32 {q9},[r14,:128]!
- vld1.32 {q10},[r14,:128]!
- vld1.32 {q11},[r14,:128]!
- vrev32.8 q0,q0 @ yes, even on
- str r0,[sp,#64]
- vrev32.8 q1,q1 @ big-endian
- str r1,[sp,#68]
- mov r1,sp
- vrev32.8 q2,q2
- str r2,[sp,#72]
- vrev32.8 q3,q3
- str r12,[sp,#76] @ save original sp
- vadd.i32 q8,q8,q0
- vadd.i32 q9,q9,q1
- vst1.32 {q8},[r1,:128]!
- vadd.i32 q10,q10,q2
- vst1.32 {q9},[r1,:128]!
- vadd.i32 q11,q11,q3
- vst1.32 {q10},[r1,:128]!
- vst1.32 {q11},[r1,:128]!
-
- ldmia r0,{r4-r11}
- sub r1,r1,#64
- ldr r2,[sp,#0]
- eor r12,r12,r12
- eor r3,r5,r6
- b .L_00_48
-
-.align 4
-.L_00_48:
- vext.8 q8,q0,q1,#4
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- vext.8 q9,q2,q3,#4
- add r4,r4,r12
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vadd.i32 q0,q0,q9
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- vshr.u32 q9,q8,#3
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#4]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- veor q9,q9,q10
- add r10,r10,r2
- vsli.32 q11,q8,#14
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- vshr.u32 d24,d7,#17
- add r11,r11,r3
- and r2,r2,r7
- veor q9,q9,q11
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- vsli.32 d24,d7,#15
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- vshr.u32 d25,d7,#10
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- vadd.i32 q0,q0,q9
- add r10,r10,r2
- ldr r2,[sp,#8]
- veor d25,d25,d24
- and r12,r12,r3
- add r6,r6,r10
- vshr.u32 d24,d7,#19
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- vsli.32 d24,d7,#13
- add r9,r9,r2
- eor r2,r7,r8
- veor d25,d25,d24
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- vadd.i32 d0,d0,d25
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- vshr.u32 d24,d0,#17
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- vsli.32 d24,d0,#15
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- vshr.u32 d25,d0,#10
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- veor d25,d25,d24
- ldr r2,[sp,#12]
- and r3,r3,r12
- vshr.u32 d24,d0,#19
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- vld1.32 {q8},[r14,:128]!
- add r8,r8,r2
- vsli.32 d24,d0,#13
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- veor d25,d25,d24
- add r9,r9,r3
- and r2,r2,r5
- vadd.i32 d1,d1,d25
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- vadd.i32 q8,q8,q0
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#16]
- and r12,r12,r3
- add r4,r4,r8
- vst1.32 {q8},[r1,:128]!
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vext.8 q8,q1,q2,#4
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- vext.8 q9,q3,q0,#4
- add r8,r8,r12
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vadd.i32 q1,q1,q9
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- vshr.u32 q9,q8,#3
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#20]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- veor q9,q9,q10
- add r6,r6,r2
- vsli.32 q11,q8,#14
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- vshr.u32 d24,d1,#17
- add r7,r7,r3
- and r2,r2,r11
- veor q9,q9,q11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- vsli.32 d24,d1,#15
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- vshr.u32 d25,d1,#10
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- vadd.i32 q1,q1,q9
- add r6,r6,r2
- ldr r2,[sp,#24]
- veor d25,d25,d24
- and r12,r12,r3
- add r10,r10,r6
- vshr.u32 d24,d1,#19
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- vsli.32 d24,d1,#13
- add r5,r5,r2
- eor r2,r11,r4
- veor d25,d25,d24
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- vadd.i32 d2,d2,d25
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- vshr.u32 d24,d2,#17
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- vsli.32 d24,d2,#15
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- vshr.u32 d25,d2,#10
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- veor d25,d25,d24
- ldr r2,[sp,#28]
- and r3,r3,r12
- vshr.u32 d24,d2,#19
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- vld1.32 {q8},[r14,:128]!
- add r4,r4,r2
- vsli.32 d24,d2,#13
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- veor d25,d25,d24
- add r5,r5,r3
- and r2,r2,r9
- vadd.i32 d3,d3,d25
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- vadd.i32 q8,q8,q1
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#32]
- and r12,r12,r3
- add r8,r8,r4
- vst1.32 {q8},[r1,:128]!
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vext.8 q8,q2,q3,#4
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- vext.8 q9,q0,q1,#4
- add r4,r4,r12
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vadd.i32 q2,q2,q9
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- vshr.u32 q9,q8,#3
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#36]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- veor q9,q9,q10
- add r10,r10,r2
- vsli.32 q11,q8,#14
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- vshr.u32 d24,d3,#17
- add r11,r11,r3
- and r2,r2,r7
- veor q9,q9,q11
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- vsli.32 d24,d3,#15
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- vshr.u32 d25,d3,#10
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- vadd.i32 q2,q2,q9
- add r10,r10,r2
- ldr r2,[sp,#40]
- veor d25,d25,d24
- and r12,r12,r3
- add r6,r6,r10
- vshr.u32 d24,d3,#19
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- vsli.32 d24,d3,#13
- add r9,r9,r2
- eor r2,r7,r8
- veor d25,d25,d24
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- vadd.i32 d4,d4,d25
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- vshr.u32 d24,d4,#17
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- vsli.32 d24,d4,#15
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- vshr.u32 d25,d4,#10
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- veor d25,d25,d24
- ldr r2,[sp,#44]
- and r3,r3,r12
- vshr.u32 d24,d4,#19
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- vld1.32 {q8},[r14,:128]!
- add r8,r8,r2
- vsli.32 d24,d4,#13
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- veor d25,d25,d24
- add r9,r9,r3
- and r2,r2,r5
- vadd.i32 d5,d5,d25
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- vadd.i32 q8,q8,q2
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#48]
- and r12,r12,r3
- add r4,r4,r8
- vst1.32 {q8},[r1,:128]!
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vext.8 q8,q3,q0,#4
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- vext.8 q9,q1,q2,#4
- add r8,r8,r12
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vadd.i32 q3,q3,q9
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- vshr.u32 q9,q8,#3
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#52]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- veor q9,q9,q10
- add r6,r6,r2
- vsli.32 q11,q8,#14
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- vshr.u32 d24,d5,#17
- add r7,r7,r3
- and r2,r2,r11
- veor q9,q9,q11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- vsli.32 d24,d5,#15
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- vshr.u32 d25,d5,#10
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- vadd.i32 q3,q3,q9
- add r6,r6,r2
- ldr r2,[sp,#56]
- veor d25,d25,d24
- and r12,r12,r3
- add r10,r10,r6
- vshr.u32 d24,d5,#19
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- vsli.32 d24,d5,#13
- add r5,r5,r2
- eor r2,r11,r4
- veor d25,d25,d24
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- vadd.i32 d6,d6,d25
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- vshr.u32 d24,d6,#17
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- vsli.32 d24,d6,#15
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- vshr.u32 d25,d6,#10
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- veor d25,d25,d24
- ldr r2,[sp,#60]
- and r3,r3,r12
- vshr.u32 d24,d6,#19
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- vld1.32 {q8},[r14,:128]!
- add r4,r4,r2
- vsli.32 d24,d6,#13
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- veor d25,d25,d24
- add r5,r5,r3
- and r2,r2,r9
- vadd.i32 d7,d7,d25
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- vadd.i32 q8,q8,q3
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[r14]
- and r12,r12,r3
- add r8,r8,r4
- vst1.32 {q8},[r1,:128]!
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- teq r2,#0 @ check for K256 terminator
- ldr r2,[sp,#0]
- sub r1,r1,#64
- bne .L_00_48
-
- ldr r1,[sp,#68]
- ldr r0,[sp,#72]
- sub r14,r14,#256 @ rewind r14
- teq r1,r0
- subeq r1,r1,#64 @ avoid SEGV
- vld1.8 {q0},[r1]! @ load next input block
- vld1.8 {q1},[r1]!
- vld1.8 {q2},[r1]!
- vld1.8 {q3},[r1]!
- strne r1,[sp,#68]
- mov r1,sp
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- add r4,r4,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vrev32.8 q0,q0
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vadd.i32 q8,q8,q0
- ldr r2,[sp,#4]
- and r3,r3,r12
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- add r10,r10,r2
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- add r11,r11,r3
- and r2,r2,r7
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- add r10,r10,r2
- ldr r2,[sp,#8]
- and r12,r12,r3
- add r6,r6,r10
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- add r9,r9,r2
- eor r2,r7,r8
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- ldr r2,[sp,#12]
- and r3,r3,r12
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- add r8,r8,r2
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- add r9,r9,r3
- and r2,r2,r5
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#16]
- and r12,r12,r3
- add r4,r4,r8
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vst1.32 {q8},[r1,:128]!
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- add r8,r8,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vrev32.8 q1,q1
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vadd.i32 q8,q8,q1
- ldr r2,[sp,#20]
- and r3,r3,r12
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- add r6,r6,r2
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- add r7,r7,r3
- and r2,r2,r11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- add r6,r6,r2
- ldr r2,[sp,#24]
- and r12,r12,r3
- add r10,r10,r6
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- add r5,r5,r2
- eor r2,r11,r4
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- ldr r2,[sp,#28]
- and r3,r3,r12
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- add r4,r4,r2
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- add r5,r5,r3
- and r2,r2,r9
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#32]
- and r12,r12,r3
- add r8,r8,r4
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vst1.32 {q8},[r1,:128]!
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- add r4,r4,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vrev32.8 q2,q2
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vadd.i32 q8,q8,q2
- ldr r2,[sp,#36]
- and r3,r3,r12
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- add r10,r10,r2
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- add r11,r11,r3
- and r2,r2,r7
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- add r10,r10,r2
- ldr r2,[sp,#40]
- and r12,r12,r3
- add r6,r6,r10
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- add r9,r9,r2
- eor r2,r7,r8
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- ldr r2,[sp,#44]
- and r3,r3,r12
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- add r8,r8,r2
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- add r9,r9,r3
- and r2,r2,r5
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#48]
- and r12,r12,r3
- add r4,r4,r8
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vst1.32 {q8},[r1,:128]!
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- add r8,r8,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vrev32.8 q3,q3
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vadd.i32 q8,q8,q3
- ldr r2,[sp,#52]
- and r3,r3,r12
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- add r6,r6,r2
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- add r7,r7,r3
- and r2,r2,r11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- add r6,r6,r2
- ldr r2,[sp,#56]
- and r12,r12,r3
- add r10,r10,r6
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- add r5,r5,r2
- eor r2,r11,r4
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- ldr r2,[sp,#60]
- and r3,r3,r12
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- add r4,r4,r2
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- add r5,r5,r3
- and r2,r2,r9
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#64]
- and r12,r12,r3
- add r8,r8,r4
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vst1.32 {q8},[r1,:128]!
- ldr r0,[r2,#0]
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldr r12,[r2,#4]
- ldr r3,[r2,#8]
- ldr r1,[r2,#12]
- add r4,r4,r0 @ accumulate
- ldr r0,[r2,#16]
- add r5,r5,r12
- ldr r12,[r2,#20]
- add r6,r6,r3
- ldr r3,[r2,#24]
- add r7,r7,r1
- ldr r1,[r2,#28]
- add r8,r8,r0
- str r4,[r2],#4
- add r9,r9,r12
- str r5,[r2],#4
- add r10,r10,r3
- str r6,[r2],#4
- add r11,r11,r1
- str r7,[r2],#4
- stmia r2,{r8-r11}
-
- movne r1,sp
- ldrne r2,[sp,#0]
- eorne r12,r12,r12
- ldreq sp,[sp,#76] @ restore original sp
- eorne r3,r5,r6
- bne .L_00_48
-
- ldmia sp!,{r4-r12,pc}
-.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
-#endif
-#if __ARM_ARCH__>=7
-.type sha256_block_data_order_armv8,%function
-.align 5
-sha256_block_data_order_armv8:
-.LARMv8:
- vld1.32 {q0,q1},[r0]
- sub r3,r3,#sha256_block_data_order-K256
-
-.Loop_v8:
- vld1.8 {q8-q9},[r1]!
- vld1.8 {q10-q11},[r1]!
- vld1.32 {q12},[r3]!
- vrev32.8 q8,q8
- vrev32.8 q9,q9
- vrev32.8 q10,q10
- vrev32.8 q11,q11
- vmov q14,q0 @ offload
- vmov q15,q1
- teq r1,r2
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
- .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
- .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
-
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
-
- vld1.32 {q13},[r3]
- vadd.i32 q12,q12,q10
- sub r3,r3,#256-16 @ rewind
- vmov q2,q0
- .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
- .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
-
- vadd.i32 q13,q13,q11
- vmov q2,q0
- .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
- .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
-
- vadd.i32 q0,q0,q14
- vadd.i32 q1,q1,q15
- bne .Loop_v8
-
- vst1.32 {q0,q1},[r0]
-
- bx lr @ bx lr
-.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
-#endif
-.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha256-armv4.pl b/app/openssl/crypto/sha/asm/sha256-armv4.pl
deleted file mode 100644
index 505ca8f3..00000000
--- a/app/openssl/crypto/sha/asm/sha256-armv4.pl
+++ /dev/null
@@ -1,656 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA256 block procedure for ARMv4. May 2007.
-
-# Performance is ~2x better than gcc 3.4 generated code and in "abso-
-# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
-# byte [on single-issue Xscale PXA250 core].
-
-# July 2010.
-#
-# Rescheduling for dual-issue pipeline resulted in 22% improvement on
-# Cortex A8 core and ~20 cycles per processed byte.
-
-# February 2011.
-#
-# Profiler-assisted and platform-specific optimization resulted in 16%
-# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
-
-# September 2013.
-#
-# Add NEON implementation. On Cortex A8 it was measured to process one
-# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
-# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
-# code (meaning that latter performs sub-optimally, nothing was done
-# about it).
-
-# May 2014.
-#
-# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-$ctx="r0"; $t0="r0";
-$inp="r1"; $t4="r1";
-$len="r2"; $t1="r2";
-$T1="r3"; $t3="r3";
-$A="r4";
-$B="r5";
-$C="r6";
-$D="r7";
-$E="r8";
-$F="r9";
-$G="r10";
-$H="r11";
-@V=($A,$B,$C,$D,$E,$F,$G,$H);
-$t2="r12";
-$Ktbl="r14";
-
-@Sigma0=( 2,13,22);
-@Sigma1=( 6,11,25);
-@sigma0=( 7,18, 3);
-@sigma1=(17,19,10);
-
-sub BODY_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___ if ($i<16);
-#if __ARM_ARCH__>=7
- @ ldr $t1,[$inp],#4 @ $i
-# if $i==15
- str $inp,[sp,#17*4] @ make room for $t4
-# endif
- eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
- add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
- eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
- rev $t1,$t1
-#else
- @ ldrb $t1,[$inp,#3] @ $i
- add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
- ldrb $t2,[$inp,#2]
- ldrb $t0,[$inp,#1]
- orr $t1,$t1,$t2,lsl#8
- ldrb $t2,[$inp],#4
- orr $t1,$t1,$t0,lsl#16
-# if $i==15
- str $inp,[sp,#17*4] @ make room for $t4
-# endif
- eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
- orr $t1,$t1,$t2,lsl#24
- eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
-#endif
-___
-$code.=<<___;
- ldr $t2,[$Ktbl],#4 @ *K256++
- add $h,$h,$t1 @ h+=X[i]
- str $t1,[sp,#`$i%16`*4]
- eor $t1,$f,$g
- add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
- and $t1,$t1,$e
- add $h,$h,$t2 @ h+=K256[i]
- eor $t1,$t1,$g @ Ch(e,f,g)
- eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
- add $h,$h,$t1 @ h+=Ch(e,f,g)
-#if $i==31
- and $t2,$t2,#0xff
- cmp $t2,#0xf2 @ done?
-#endif
-#if $i<15
-# if __ARM_ARCH__>=7
- ldr $t1,[$inp],#4 @ prefetch
-# else
- ldrb $t1,[$inp,#3]
-# endif
- eor $t2,$a,$b @ a^b, b^c in next round
-#else
- ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
- eor $t2,$a,$b @ a^b, b^c in next round
- ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
-#endif
- eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
- and $t3,$t3,$t2 @ (b^c)&=(a^b)
- add $d,$d,$h @ d+=h
- eor $t3,$t3,$b @ Maj(a,b,c)
- add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
- @ add $h,$h,$t3 @ h+=Maj(a,b,c)
-___
- ($t2,$t3)=($t3,$t2);
-}
-
-sub BODY_16_XX {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___;
- @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
- @ ldr $t4,[sp,#`($i+14)%16`*4]
- mov $t0,$t1,ror#$sigma0[0]
- add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
- mov $t2,$t4,ror#$sigma1[0]
- eor $t0,$t0,$t1,ror#$sigma0[1]
- eor $t2,$t2,$t4,ror#$sigma1[1]
- eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
- ldr $t1,[sp,#`($i+0)%16`*4]
- eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
- ldr $t4,[sp,#`($i+9)%16`*4]
-
- add $t2,$t2,$t0
- eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
- add $t1,$t1,$t2
- eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
- add $t1,$t1,$t4 @ X[i]
-___
- &BODY_00_15(@_);
-}
-
-$code=<<___;
-#include "arm_arch.h"
-
-.text
-.code 32
-
-.type K256,%object
-.align 5
-K256:
-.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.size K256,.-K256
-.word 0 @ terminator
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha256_block_data_order
-.align 5
-
-.global sha256_block_data_order
-.type sha256_block_data_order,%function
-sha256_block_data_order:
- sub r3,pc,#8 @ sha256_block_data_order
- add $len,$inp,$len,lsl#6 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#ARMV8_SHA256
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
- stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
- ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
- sub $Ktbl,r3,#256+32 @ K256
- sub sp,sp,#16*4 @ alloca(X[16])
-.Loop:
-# if __ARM_ARCH__>=7
- ldr $t1,[$inp],#4
-# else
- ldrb $t1,[$inp,#3]
-# endif
- eor $t3,$B,$C @ magic
- eor $t2,$t2,$t2
-___
-for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
-$code.=".Lrounds_16_xx:\n";
-for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- ldreq $t3,[sp,#16*4] @ pull ctx
- bne .Lrounds_16_xx
-
- add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
- ldr $t0,[$t3,#0]
- ldr $t1,[$t3,#4]
- ldr $t2,[$t3,#8]
- add $A,$A,$t0
- ldr $t0,[$t3,#12]
- add $B,$B,$t1
- ldr $t1,[$t3,#16]
- add $C,$C,$t2
- ldr $t2,[$t3,#20]
- add $D,$D,$t0
- ldr $t0,[$t3,#24]
- add $E,$E,$t1
- ldr $t1,[$t3,#28]
- add $F,$F,$t2
- ldr $inp,[sp,#17*4] @ pull inp
- ldr $t2,[sp,#18*4] @ pull inp+len
- add $G,$G,$t0
- add $H,$H,$t1
- stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
- cmp $inp,$t2
- sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
- bne .Loop
-
- add sp,sp,#`16+3`*4 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r11,pc}
-#else
- ldmia sp!,{r4-r11,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- bx lr @ interoperable with Thumb ISA:-)
-#endif
-.size sha256_block_data_order,.-sha256_block_data_order
-___
-######################################################################
-# NEON stuff
-#
-{{{
-my @X=map("q$_",(0..3));
-my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
-my $Xfer=$t4;
-my $j=0;
-
-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
-
-sub AUTOLOAD() # thunk [simplified] x86-style perlasm
-{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
- my $arg = pop;
- $arg = "#$arg" if ($arg*1 eq $arg);
- $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
-}
-
-sub Xupdate()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e,$f,$g,$h);
-
- &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T2,$T0,$sigma0[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T1,$T0,$sigma0[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T2,$T0,32-$sigma0[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T3,$T0,$sigma0[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T1,$T1,$T2);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T3,$T0,32-$sigma0[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T1,$T1,$T3); # sigma0(X[1..4])
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T5,$T5,$T4);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T5,$T5,$T4); # sigma1(X[14..15])
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T5,$T5,$T4);
- eval(shift(@insns));
- eval(shift(@insns));
- &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &vld1_32 ("{$T0}","[$Ktbl,:128]!");
- eval(shift(@insns));
- eval(shift(@insns));
- &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
- eval(shift(@insns));
- eval(shift(@insns));
- &veor ($T5,$T5,$T4); # sigma1(X[16..17])
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 ($T0,$T0,@X[0]);
- while($#insns>=2) { eval(shift(@insns)); }
- &vst1_32 ("{$T0}","[$Xfer,:128]!");
- eval(shift(@insns));
- eval(shift(@insns));
-
- push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub Xpreload()
-{ use integer;
- my $body = shift;
- my @insns = (&$body,&$body,&$body,&$body);
- my ($a,$b,$c,$d,$e,$f,$g,$h);
-
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vld1_32 ("{$T0}","[$Ktbl,:128]!");
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vrev32_8 (@X[0],@X[0]);
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- eval(shift(@insns));
- &vadd_i32 ($T0,$T0,@X[0]);
- foreach (@insns) { eval; } # remaining instructions
- &vst1_32 ("{$T0}","[$Xfer,:128]!");
-
- push(@X,shift(@X)); # "rotate" X[]
-}
-
-sub body_00_15 () {
- (
- '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
- '&add ($h,$h,$t1)', # h+=X[i]+K[i]
- '&eor ($t1,$f,$g)',
- '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
- '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
- '&and ($t1,$t1,$e)',
- '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
- '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
- '&eor ($t1,$t1,$g)', # Ch(e,f,g)
- '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
- '&eor ($t2,$a,$b)', # a^b, b^c in next round
- '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
- '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
- '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
- '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
- '&ldr ($t1,"[sp,#64]") if ($j==31)',
- '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
- '&add ($d,$d,$h)', # d+=h
- '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
- '&eor ($t3,$t3,$b)', # Maj(a,b,c)
- '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
- )
-}
-
-$code.=<<___;
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.type sha256_block_data_order_neon,%function
-.align 4
-sha256_block_data_order_neon:
-.LNEON:
- stmdb sp!,{r4-r12,lr}
-
- mov $t2,sp
- sub sp,sp,#16*4+16 @ alloca
- sub $Ktbl,r3,#256+32 @ K256
- bic sp,sp,#15 @ align for 128-bit stores
-
- vld1.8 {@X[0]},[$inp]!
- vld1.8 {@X[1]},[$inp]!
- vld1.8 {@X[2]},[$inp]!
- vld1.8 {@X[3]},[$inp]!
- vld1.32 {$T0},[$Ktbl,:128]!
- vld1.32 {$T1},[$Ktbl,:128]!
- vld1.32 {$T2},[$Ktbl,:128]!
- vld1.32 {$T3},[$Ktbl,:128]!
- vrev32.8 @X[0],@X[0] @ yes, even on
- str $ctx,[sp,#64]
- vrev32.8 @X[1],@X[1] @ big-endian
- str $inp,[sp,#68]
- mov $Xfer,sp
- vrev32.8 @X[2],@X[2]
- str $len,[sp,#72]
- vrev32.8 @X[3],@X[3]
- str $t2,[sp,#76] @ save original sp
- vadd.i32 $T0,$T0,@X[0]
- vadd.i32 $T1,$T1,@X[1]
- vst1.32 {$T0},[$Xfer,:128]!
- vadd.i32 $T2,$T2,@X[2]
- vst1.32 {$T1},[$Xfer,:128]!
- vadd.i32 $T3,$T3,@X[3]
- vst1.32 {$T2},[$Xfer,:128]!
- vst1.32 {$T3},[$Xfer,:128]!
-
- ldmia $ctx,{$A-$H}
- sub $Xfer,$Xfer,#64
- ldr $t1,[sp,#0]
- eor $t2,$t2,$t2
- eor $t3,$B,$C
- b .L_00_48
-
-.align 4
-.L_00_48:
-___
- &Xupdate(\&body_00_15);
- &Xupdate(\&body_00_15);
- &Xupdate(\&body_00_15);
- &Xupdate(\&body_00_15);
-$code.=<<___;
- teq $t1,#0 @ check for K256 terminator
- ldr $t1,[sp,#0]
- sub $Xfer,$Xfer,#64
- bne .L_00_48
-
- ldr $inp,[sp,#68]
- ldr $t0,[sp,#72]
- sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
- teq $inp,$t0
- subeq $inp,$inp,#64 @ avoid SEGV
- vld1.8 {@X[0]},[$inp]! @ load next input block
- vld1.8 {@X[1]},[$inp]!
- vld1.8 {@X[2]},[$inp]!
- vld1.8 {@X[3]},[$inp]!
- strne $inp,[sp,#68]
- mov $Xfer,sp
-___
- &Xpreload(\&body_00_15);
- &Xpreload(\&body_00_15);
- &Xpreload(\&body_00_15);
- &Xpreload(\&body_00_15);
-$code.=<<___;
- ldr $t0,[$t1,#0]
- add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
- ldr $t2,[$t1,#4]
- ldr $t3,[$t1,#8]
- ldr $t4,[$t1,#12]
- add $A,$A,$t0 @ accumulate
- ldr $t0,[$t1,#16]
- add $B,$B,$t2
- ldr $t2,[$t1,#20]
- add $C,$C,$t3
- ldr $t3,[$t1,#24]
- add $D,$D,$t4
- ldr $t4,[$t1,#28]
- add $E,$E,$t0
- str $A,[$t1],#4
- add $F,$F,$t2
- str $B,[$t1],#4
- add $G,$G,$t3
- str $C,[$t1],#4
- add $H,$H,$t4
- str $D,[$t1],#4
- stmia $t1,{$E-$H}
-
- movne $Xfer,sp
- ldrne $t1,[sp,#0]
- eorne $t2,$t2,$t2
- ldreq sp,[sp,#76] @ restore original sp
- eorne $t3,$B,$C
- bne .L_00_48
-
- ldmia sp!,{r4-r12,pc}
-.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
-#endif
-___
-}}}
-######################################################################
-# ARMv8 stuff
-#
-{{{
-my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
-my @MSG=map("q$_",(8..11));
-my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
-my $Ktbl="r3";
-
-$code.=<<___;
-#if __ARM_ARCH__>=7
-.type sha256_block_data_order_armv8,%function
-.align 5
-sha256_block_data_order_armv8:
-.LARMv8:
- vld1.32 {$ABCD,$EFGH},[$ctx]
- sub $Ktbl,r3,#sha256_block_data_order-K256
-
-.Loop_v8:
- vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
- vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
- vld1.32 {$W0},[$Ktbl]!
- vrev32.8 @MSG[0],@MSG[0]
- vrev32.8 @MSG[1],@MSG[1]
- vrev32.8 @MSG[2],@MSG[2]
- vrev32.8 @MSG[3],@MSG[3]
- vmov $ABCD_SAVE,$ABCD @ offload
- vmov $EFGH_SAVE,$EFGH
- teq $inp,$len
-___
-for($i=0;$i<12;$i++) {
-$code.=<<___;
- vld1.32 {$W1},[$Ktbl]!
- vadd.i32 $W0,$W0,@MSG[0]
- sha256su0 @MSG[0],@MSG[1]
- vmov $abcd,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
- sha256su1 @MSG[0],@MSG[2],@MSG[3]
-___
- ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
-}
-$code.=<<___;
- vld1.32 {$W1},[$Ktbl]!
- vadd.i32 $W0,$W0,@MSG[0]
- vmov $abcd,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
-
- vld1.32 {$W0},[$Ktbl]!
- vadd.i32 $W1,$W1,@MSG[1]
- vmov $abcd,$ABCD
- sha256h $ABCD,$EFGH,$W1
- sha256h2 $EFGH,$abcd,$W1
-
- vld1.32 {$W1},[$Ktbl]
- vadd.i32 $W0,$W0,@MSG[2]
- sub $Ktbl,$Ktbl,#256-16 @ rewind
- vmov $abcd,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
-
- vadd.i32 $W1,$W1,@MSG[3]
- vmov $abcd,$ABCD
- sha256h $ABCD,$EFGH,$W1
- sha256h2 $EFGH,$abcd,$W1
-
- vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
- vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
- bne .Loop_v8
-
- vst1.32 {$ABCD,$EFGH},[$ctx]
-
- ret @ bx lr
-.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
-#endif
-___
-}}}
-$code.=<<___;
-.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
-___
-
-{ my %opcode = (
- "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
- "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
-
- sub unsha256 {
- my ($mnemonic,$arg)=@_;
-
- if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
- my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
- |(($2&7)<<17)|(($2&8)<<4)
- |(($3&7)<<1) |(($3&8)<<2);
- # since ARMv7 instructions are always encoded little-endian.
- # correct solution is to use .inst directive, but older
- # assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
- $word&0xff,($word>>8)&0xff,
- ($word>>16)&0xff,($word>>24)&0xff,
- $mnemonic,$arg;
- }
- }
-}
-
-foreach (split($/,$code)) {
-
- s/\`([^\`]*)\`/eval $1/geo;
-
- s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
-
- s/\bret\b/bx lr/go or
- s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
-
- print $_,"\n";
-}
-
-close STDOUT; # enforce flush
diff --git a/app/openssl/crypto/sha/asm/sha256-armv8.S b/app/openssl/crypto/sha/asm/sha256-armv8.S
deleted file mode 100644
index bd43b1fe..00000000
--- a/app/openssl/crypto/sha/asm/sha256-armv8.S
+++ /dev/null
@@ -1,1141 +0,0 @@
-#include "arm_arch.h"
-
-.text
-
-.globl sha256_block_data_order
-.type sha256_block_data_order,%function
-.align 6
-sha256_block_data_order:
- ldr x16,.LOPENSSL_armcap_P
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA256
- b.ne .Lv8_entry
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*4
-
- ldp w20,w21,[x0] // load context
- ldp w22,w23,[x0,#2*4]
- ldp w24,w25,[x0,#4*4]
- add x2,x1,x2,lsl#6 // end of input
- ldp w26,w27,[x0,#6*4]
- adr x30,K256
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp w3,w4,[x1],#2*4
- ldr w19,[x30],#4 // *K++
- eor w28,w21,w22 // magic seed
- str x1,[x29,#112]
-#ifndef __ARMEB__
- rev w3,w3 // 0
-#endif
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w6,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w3 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w4,w4 // 1
-#endif
- ldp w5,w6,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w7,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w4 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w5,w5 // 2
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w8,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w5 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w6,w6 // 3
-#endif
- ldp w7,w8,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w9,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w6 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w7,w7 // 4
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w10,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w7 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w10,ror#11 // Sigma1(e)
- ror w10,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w10,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w8,w8 // 5
-#endif
- ldp w9,w10,[x1],#2*4
- add w23,w23,w17 // h+=Sigma0(a)
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w11,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w8 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w11,ror#11 // Sigma1(e)
- ror w11,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w11,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w9,w9 // 6
-#endif
- add w22,w22,w17 // h+=Sigma0(a)
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w12,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w9 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w12,ror#11 // Sigma1(e)
- ror w12,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w12,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w10,w10 // 7
-#endif
- ldp w11,w12,[x1],#2*4
- add w21,w21,w17 // h+=Sigma0(a)
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- eor w13,w25,w25,ror#14
- and w17,w26,w25
- bic w28,w27,w25
- add w20,w20,w10 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w13,ror#11 // Sigma1(e)
- ror w13,w21,#2
- add w20,w20,w17 // h+=Ch(e,f,g)
- eor w17,w21,w21,ror#9
- add w20,w20,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w24,w24,w20 // d+=h
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w13,w17,ror#13 // Sigma0(a)
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w20,w20,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w11,w11 // 8
-#endif
- add w20,w20,w17 // h+=Sigma0(a)
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w14,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w11 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w14,ror#11 // Sigma1(e)
- ror w14,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w14,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w12,w12 // 9
-#endif
- ldp w13,w14,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w15,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w12 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w15,ror#11 // Sigma1(e)
- ror w15,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w15,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w13,w13 // 10
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w0,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w13 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w0,ror#11 // Sigma1(e)
- ror w0,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w0,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w14,w14 // 11
-#endif
- ldp w15,w0,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w6,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w14 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w15,w15 // 12
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w7,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w15 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w0,w0 // 13
-#endif
- ldp w1,w2,[x1]
- add w23,w23,w17 // h+=Sigma0(a)
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w8,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w0 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w1,w1 // 14
-#endif
- ldr w6,[sp,#12]
- add w22,w22,w17 // h+=Sigma0(a)
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w9,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w1 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev w2,w2 // 15
-#endif
- ldr w7,[sp,#0]
- add w21,w21,w17 // h+=Sigma0(a)
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
-.Loop_16_xx:
- ldr w8,[sp,#4]
- str w11,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w10,w5,#7
- and w17,w25,w24
- ror w9,w2,#17
- bic w19,w26,w24
- ror w11,w20,#2
- add w27,w27,w3 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w10,w10,w5,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w11,w11,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w9,w9,w2,ror#19
- eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w11,w20,ror#22 // Sigma0(a)
- eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
- add w4,w4,w13
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w4,w4,w10
- add w27,w27,w17 // h+=Sigma0(a)
- add w4,w4,w9
- ldr w9,[sp,#8]
- str w12,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w11,w6,#7
- and w17,w24,w23
- ror w10,w3,#17
- bic w28,w25,w23
- ror w12,w27,#2
- add w26,w26,w4 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w11,w11,w6,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w12,w12,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w10,w10,w3,ror#19
- eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w12,w27,ror#22 // Sigma0(a)
- eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
- add w5,w5,w14
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w5,w5,w11
- add w26,w26,w17 // h+=Sigma0(a)
- add w5,w5,w10
- ldr w10,[sp,#12]
- str w13,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w12,w7,#7
- and w17,w23,w22
- ror w11,w4,#17
- bic w19,w24,w22
- ror w13,w26,#2
- add w25,w25,w5 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w12,w12,w7,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w13,w13,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w11,w11,w4,ror#19
- eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w13,w26,ror#22 // Sigma0(a)
- eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
- add w6,w6,w15
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w6,w6,w12
- add w25,w25,w17 // h+=Sigma0(a)
- add w6,w6,w11
- ldr w11,[sp,#0]
- str w14,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w13,w8,#7
- and w17,w22,w21
- ror w12,w5,#17
- bic w28,w23,w21
- ror w14,w25,#2
- add w24,w24,w6 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w13,w13,w8,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w14,w14,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w12,w12,w5,ror#19
- eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w14,w25,ror#22 // Sigma0(a)
- eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
- add w7,w7,w0
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w7,w7,w13
- add w24,w24,w17 // h+=Sigma0(a)
- add w7,w7,w12
- ldr w12,[sp,#4]
- str w15,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w14,w9,#7
- and w17,w21,w20
- ror w13,w6,#17
- bic w19,w22,w20
- ror w15,w24,#2
- add w23,w23,w7 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w14,w14,w9,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w15,w15,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w13,w13,w6,ror#19
- eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w15,w24,ror#22 // Sigma0(a)
- eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
- add w8,w8,w1
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w8,w8,w14
- add w23,w23,w17 // h+=Sigma0(a)
- add w8,w8,w13
- ldr w13,[sp,#8]
- str w0,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w15,w10,#7
- and w17,w20,w27
- ror w14,w7,#17
- bic w28,w21,w27
- ror w0,w23,#2
- add w22,w22,w8 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w15,w15,w10,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w0,w0,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w14,w14,w7,ror#19
- eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w0,w23,ror#22 // Sigma0(a)
- eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
- add w9,w9,w2
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w9,w9,w15
- add w22,w22,w17 // h+=Sigma0(a)
- add w9,w9,w14
- ldr w14,[sp,#12]
- str w1,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w0,w11,#7
- and w17,w27,w26
- ror w15,w8,#17
- bic w19,w20,w26
- ror w1,w22,#2
- add w21,w21,w9 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w0,w0,w11,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w1,w1,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w15,w15,w8,ror#19
- eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w1,w22,ror#22 // Sigma0(a)
- eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
- add w10,w10,w3
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w10,w10,w0
- add w21,w21,w17 // h+=Sigma0(a)
- add w10,w10,w15
- ldr w15,[sp,#0]
- str w2,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w1,w12,#7
- and w17,w26,w25
- ror w0,w9,#17
- bic w28,w27,w25
- ror w2,w21,#2
- add w20,w20,w10 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w1,w1,w12,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w2,w2,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w0,w0,w9,ror#19
- eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w2,w21,ror#22 // Sigma0(a)
- eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
- add w11,w11,w4
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w11,w11,w1
- add w20,w20,w17 // h+=Sigma0(a)
- add w11,w11,w0
- ldr w0,[sp,#4]
- str w3,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w2,w13,#7
- and w17,w25,w24
- ror w1,w10,#17
- bic w19,w26,w24
- ror w3,w20,#2
- add w27,w27,w11 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w2,w2,w13,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w3,w3,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w1,w1,w10,ror#19
- eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w3,w20,ror#22 // Sigma0(a)
- eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
- add w12,w12,w5
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w12,w12,w2
- add w27,w27,w17 // h+=Sigma0(a)
- add w12,w12,w1
- ldr w1,[sp,#8]
- str w4,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w3,w14,#7
- and w17,w24,w23
- ror w2,w11,#17
- bic w28,w25,w23
- ror w4,w27,#2
- add w26,w26,w12 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w3,w3,w14,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w4,w4,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w2,w2,w11,ror#19
- eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w4,w27,ror#22 // Sigma0(a)
- eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
- add w13,w13,w6
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w13,w13,w3
- add w26,w26,w17 // h+=Sigma0(a)
- add w13,w13,w2
- ldr w2,[sp,#12]
- str w5,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w4,w15,#7
- and w17,w23,w22
- ror w3,w12,#17
- bic w19,w24,w22
- ror w5,w26,#2
- add w25,w25,w13 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w4,w4,w15,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w5,w5,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w3,w3,w12,ror#19
- eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w5,w26,ror#22 // Sigma0(a)
- eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
- add w14,w14,w7
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w14,w14,w4
- add w25,w25,w17 // h+=Sigma0(a)
- add w14,w14,w3
- ldr w3,[sp,#0]
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w5,w0,#7
- and w17,w22,w21
- ror w4,w13,#17
- bic w28,w23,w21
- ror w6,w25,#2
- add w24,w24,w14 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w5,w5,w0,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w6,w6,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w4,w4,w13,ror#19
- eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w25,ror#22 // Sigma0(a)
- eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
- add w15,w15,w8
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w15,w15,w5
- add w24,w24,w17 // h+=Sigma0(a)
- add w15,w15,w4
- ldr w4,[sp,#4]
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w6,w1,#7
- and w17,w21,w20
- ror w5,w14,#17
- bic w19,w22,w20
- ror w7,w24,#2
- add w23,w23,w15 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w6,w6,w1,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w7,w7,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w5,w5,w14,ror#19
- eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w24,ror#22 // Sigma0(a)
- eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
- add w0,w0,w9
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w0,w0,w6
- add w23,w23,w17 // h+=Sigma0(a)
- add w0,w0,w5
- ldr w5,[sp,#8]
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w7,w2,#7
- and w17,w20,w27
- ror w6,w15,#17
- bic w28,w21,w27
- ror w8,w23,#2
- add w22,w22,w0 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w7,w7,w2,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w8,w8,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w6,w6,w15,ror#19
- eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w23,ror#22 // Sigma0(a)
- eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
- add w1,w1,w10
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w1,w1,w7
- add w22,w22,w17 // h+=Sigma0(a)
- add w1,w1,w6
- ldr w6,[sp,#12]
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w8,w3,#7
- and w17,w27,w26
- ror w7,w0,#17
- bic w19,w20,w26
- ror w9,w22,#2
- add w21,w21,w1 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w8,w8,w3,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w9,w9,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w7,w7,w0,ror#19
- eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w22,ror#22 // Sigma0(a)
- eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
- add w2,w2,w11
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w2,w2,w8
- add w21,w21,w17 // h+=Sigma0(a)
- add w2,w2,w7
- ldr w7,[sp,#0]
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
- cbnz w19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#260 // rewind
-
- ldp w3,w4,[x0]
- ldp w5,w6,[x0,#2*4]
- add x1,x1,#14*4 // advance input pointer
- ldp w7,w8,[x0,#4*4]
- add w20,w20,w3
- ldp w9,w10,[x0,#6*4]
- add w21,w21,w4
- add w22,w22,w5
- add w23,w23,w6
- stp w20,w21,[x0]
- add w24,w24,w7
- add w25,w25,w8
- stp w22,w23,[x0,#2*4]
- add w26,w26,w9
- add w27,w27,w10
- cmp x1,x2
- stp w24,w25,[x0,#4*4]
- stp w26,w27,[x0,#6*4]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*4
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha256_block_data_order,.-sha256_block_data_order
-
-.align 6
-.type K256,%object
-K256:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
- .long 0 //terminator
-.size K256,.-K256
-.align 3
-.LOPENSSL_armcap_P:
- .quad OPENSSL_armcap_P-.
-.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-.type sha256_block_armv8,%function
-.align 6
-sha256_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- ld1 {v0.4s,v1.4s},[x0]
- adr x3,K256
-
-.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
- sub x2,x2,#1
- ld1 {v16.4s},[x3],#16
- rev32 v4.16b,v4.16b
- rev32 v5.16b,v5.16b
- rev32 v6.16b,v6.16b
- rev32 v7.16b,v7.16b
- orr v18.16b,v0.16b,v0.16b // offload
- orr v19.16b,v1.16b,v1.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- ld1 {v17.4s},[x3]
- add v16.4s,v16.4s,v6.4s
- sub x3,x3,#64*4-16 // rewind
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- add v17.4s,v17.4s,v7.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- add v0.4s,v0.4s,v18.4s
- add v1.4s,v1.4s,v19.4s
-
- cbnz x2,.Loop_hw
-
- st1 {v0.4s,v1.4s},[x0]
-
- ldr x29,[sp],#16
- ret
-.size sha256_block_armv8,.-sha256_block_armv8
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha256-mips.S b/app/openssl/crypto/sha/asm/sha256-mips.S
deleted file mode 100644
index 2bd728e9..00000000
--- a/app/openssl/crypto/sha/asm/sha256-mips.S
+++ /dev/null
@@ -1,1998 +0,0 @@
-#ifdef OPENSSL_FIPSCANISTER
-# include <openssl/fipssyms.h>
-#endif
-
-.text
-.set noat
-#if !defined(__vxworks) || defined(__pic__)
-.option pic2
-#endif
-
-.align 5
-.globl sha256_block_data_order
-.ent sha256_block_data_order
-sha256_block_data_order:
- .frame $29,128,$31
- .mask 3237937152,-4
- .set noreorder
- .cpload $25
- sub $29,128
- sw $31,128-1*4($29)
- sw $30,128-2*4($29)
- sw $23,128-3*4($29)
- sw $22,128-4*4($29)
- sw $21,128-5*4($29)
- sw $20,128-6*4($29)
- sw $19,128-7*4($29)
- sw $18,128-8*4($29)
- sw $17,128-9*4($29)
- sw $16,128-10*4($29)
- sll $23,$6,6
- .set reorder
- la $6,K256 # PIC-ified 'load address'
-
- lw $1,0*4($4) # load context
- lw $2,1*4($4)
- lw $3,2*4($4)
- lw $7,3*4($4)
- lw $24,4*4($4)
- lw $25,5*4($4)
- lw $30,6*4($4)
- lw $31,7*4($4)
-
- add $23,$5 # pointer to the end of input
- sw $23,16*4($29)
- b .Loop
-
-.align 5
-.Loop:
- lwl $8,3($5)
- lwr $8,0($5)
- lwl $9,7($5)
- lwr $9,4($5)
- srl $13,$8,24 # byte swap(0)
- srl $14,$8,8
- andi $15,$8,0xFF00
- sll $8,$8,24
- andi $14,0xFF00
- sll $15,$15,8
- or $8,$13
- or $14,$15
- or $8,$14
- addu $12,$8,$31 # 0
- srl $31,$24,6
- xor $15,$25,$30
- sll $14,$24,7
- and $15,$24
- srl $13,$24,11
- xor $31,$14
- sll $14,$24,21
- xor $31,$13
- srl $13,$24,25
- xor $31,$14
- sll $14,$24,26
- xor $31,$13
- xor $15,$30 # Ch(e,f,g)
- xor $13,$14,$31 # Sigma1(e)
-
- srl $31,$1,2
- addu $12,$15
- lw $15,0($6) # K[0]
- sll $14,$1,10
- addu $12,$13
- srl $13,$1,13
- xor $31,$14
- sll $14,$1,19
- xor $31,$13
- srl $13,$1,22
- xor $31,$14
- sll $14,$1,30
- xor $31,$13
- sw $8,0($29) # offload to ring buffer
- xor $31,$14 # Sigma0(a)
-
- or $13,$1,$2
- and $14,$1,$2
- and $13,$3
- or $14,$13 # Maj(a,b,c)
- addu $12,$15 # +=K[0]
- addu $31,$14
-
- addu $7,$12
- addu $31,$12
- lwl $10,11($5)
- lwr $10,8($5)
- srl $14,$9,24 # byte swap(1)
- srl $15,$9,8
- andi $16,$9,0xFF00
- sll $9,$9,24
- andi $15,0xFF00
- sll $16,$16,8
- or $9,$14
- or $15,$16
- or $9,$15
- addu $13,$9,$30 # 1
- srl $30,$7,6
- xor $16,$24,$25
- sll $15,$7,7
- and $16,$7
- srl $14,$7,11
- xor $30,$15
- sll $15,$7,21
- xor $30,$14
- srl $14,$7,25
- xor $30,$15
- sll $15,$7,26
- xor $30,$14
- xor $16,$25 # Ch(e,f,g)
- xor $14,$15,$30 # Sigma1(e)
-
- srl $30,$31,2
- addu $13,$16
- lw $16,4($6) # K[1]
- sll $15,$31,10
- addu $13,$14
- srl $14,$31,13
- xor $30,$15
- sll $15,$31,19
- xor $30,$14
- srl $14,$31,22
- xor $30,$15
- sll $15,$31,30
- xor $30,$14
- sw $9,4($29) # offload to ring buffer
- xor $30,$15 # Sigma0(a)
-
- or $14,$31,$1
- and $15,$31,$1
- and $14,$2
- or $15,$14 # Maj(a,b,c)
- addu $13,$16 # +=K[1]
- addu $30,$15
-
- addu $3,$13
- addu $30,$13
- lwl $11,15($5)
- lwr $11,12($5)
- srl $15,$10,24 # byte swap(2)
- srl $16,$10,8
- andi $17,$10,0xFF00
- sll $10,$10,24
- andi $16,0xFF00
- sll $17,$17,8
- or $10,$15
- or $16,$17
- or $10,$16
- addu $14,$10,$25 # 2
- srl $25,$3,6
- xor $17,$7,$24
- sll $16,$3,7
- and $17,$3
- srl $15,$3,11
- xor $25,$16
- sll $16,$3,21
- xor $25,$15
- srl $15,$3,25
- xor $25,$16
- sll $16,$3,26
- xor $25,$15
- xor $17,$24 # Ch(e,f,g)
- xor $15,$16,$25 # Sigma1(e)
-
- srl $25,$30,2
- addu $14,$17
- lw $17,8($6) # K[2]
- sll $16,$30,10
- addu $14,$15
- srl $15,$30,13
- xor $25,$16
- sll $16,$30,19
- xor $25,$15
- srl $15,$30,22
- xor $25,$16
- sll $16,$30,30
- xor $25,$15
- sw $10,8($29) # offload to ring buffer
- xor $25,$16 # Sigma0(a)
-
- or $15,$30,$31
- and $16,$30,$31
- and $15,$1
- or $16,$15 # Maj(a,b,c)
- addu $14,$17 # +=K[2]
- addu $25,$16
-
- addu $2,$14
- addu $25,$14
- lwl $12,19($5)
- lwr $12,16($5)
- srl $16,$11,24 # byte swap(3)
- srl $17,$11,8
- andi $18,$11,0xFF00
- sll $11,$11,24
- andi $17,0xFF00
- sll $18,$18,8
- or $11,$16
- or $17,$18
- or $11,$17
- addu $15,$11,$24 # 3
- srl $24,$2,6
- xor $18,$3,$7
- sll $17,$2,7
- and $18,$2
- srl $16,$2,11
- xor $24,$17
- sll $17,$2,21
- xor $24,$16
- srl $16,$2,25
- xor $24,$17
- sll $17,$2,26
- xor $24,$16
- xor $18,$7 # Ch(e,f,g)
- xor $16,$17,$24 # Sigma1(e)
-
- srl $24,$25,2
- addu $15,$18
- lw $18,12($6) # K[3]
- sll $17,$25,10
- addu $15,$16
- srl $16,$25,13
- xor $24,$17
- sll $17,$25,19
- xor $24,$16
- srl $16,$25,22
- xor $24,$17
- sll $17,$25,30
- xor $24,$16
- sw $11,12($29) # offload to ring buffer
- xor $24,$17 # Sigma0(a)
-
- or $16,$25,$30
- and $17,$25,$30
- and $16,$31
- or $17,$16 # Maj(a,b,c)
- addu $15,$18 # +=K[3]
- addu $24,$17
-
- addu $1,$15
- addu $24,$15
- lwl $13,23($5)
- lwr $13,20($5)
- srl $17,$12,24 # byte swap(4)
- srl $18,$12,8
- andi $19,$12,0xFF00
- sll $12,$12,24
- andi $18,0xFF00
- sll $19,$19,8
- or $12,$17
- or $18,$19
- or $12,$18
- addu $16,$12,$7 # 4
- srl $7,$1,6
- xor $19,$2,$3
- sll $18,$1,7
- and $19,$1
- srl $17,$1,11
- xor $7,$18
- sll $18,$1,21
- xor $7,$17
- srl $17,$1,25
- xor $7,$18
- sll $18,$1,26
- xor $7,$17
- xor $19,$3 # Ch(e,f,g)
- xor $17,$18,$7 # Sigma1(e)
-
- srl $7,$24,2
- addu $16,$19
- lw $19,16($6) # K[4]
- sll $18,$24,10
- addu $16,$17
- srl $17,$24,13
- xor $7,$18
- sll $18,$24,19
- xor $7,$17
- srl $17,$24,22
- xor $7,$18
- sll $18,$24,30
- xor $7,$17
- sw $12,16($29) # offload to ring buffer
- xor $7,$18 # Sigma0(a)
-
- or $17,$24,$25
- and $18,$24,$25
- and $17,$30
- or $18,$17 # Maj(a,b,c)
- addu $16,$19 # +=K[4]
- addu $7,$18
-
- addu $31,$16
- addu $7,$16
- lwl $14,27($5)
- lwr $14,24($5)
- srl $18,$13,24 # byte swap(5)
- srl $19,$13,8
- andi $20,$13,0xFF00
- sll $13,$13,24
- andi $19,0xFF00
- sll $20,$20,8
- or $13,$18
- or $19,$20
- or $13,$19
- addu $17,$13,$3 # 5
- srl $3,$31,6
- xor $20,$1,$2
- sll $19,$31,7
- and $20,$31
- srl $18,$31,11
- xor $3,$19
- sll $19,$31,21
- xor $3,$18
- srl $18,$31,25
- xor $3,$19
- sll $19,$31,26
- xor $3,$18
- xor $20,$2 # Ch(e,f,g)
- xor $18,$19,$3 # Sigma1(e)
-
- srl $3,$7,2
- addu $17,$20
- lw $20,20($6) # K[5]
- sll $19,$7,10
- addu $17,$18
- srl $18,$7,13
- xor $3,$19
- sll $19,$7,19
- xor $3,$18
- srl $18,$7,22
- xor $3,$19
- sll $19,$7,30
- xor $3,$18
- sw $13,20($29) # offload to ring buffer
- xor $3,$19 # Sigma0(a)
-
- or $18,$7,$24
- and $19,$7,$24
- and $18,$25
- or $19,$18 # Maj(a,b,c)
- addu $17,$20 # +=K[5]
- addu $3,$19
-
- addu $30,$17
- addu $3,$17
- lwl $15,31($5)
- lwr $15,28($5)
- srl $19,$14,24 # byte swap(6)
- srl $20,$14,8
- andi $21,$14,0xFF00
- sll $14,$14,24
- andi $20,0xFF00
- sll $21,$21,8
- or $14,$19
- or $20,$21
- or $14,$20
- addu $18,$14,$2 # 6
- srl $2,$30,6
- xor $21,$31,$1
- sll $20,$30,7
- and $21,$30
- srl $19,$30,11
- xor $2,$20
- sll $20,$30,21
- xor $2,$19
- srl $19,$30,25
- xor $2,$20
- sll $20,$30,26
- xor $2,$19
- xor $21,$1 # Ch(e,f,g)
- xor $19,$20,$2 # Sigma1(e)
-
- srl $2,$3,2
- addu $18,$21
- lw $21,24($6) # K[6]
- sll $20,$3,10
- addu $18,$19
- srl $19,$3,13
- xor $2,$20
- sll $20,$3,19
- xor $2,$19
- srl $19,$3,22
- xor $2,$20
- sll $20,$3,30
- xor $2,$19
- sw $14,24($29) # offload to ring buffer
- xor $2,$20 # Sigma0(a)
-
- or $19,$3,$7
- and $20,$3,$7
- and $19,$24
- or $20,$19 # Maj(a,b,c)
- addu $18,$21 # +=K[6]
- addu $2,$20
-
- addu $25,$18
- addu $2,$18
- lwl $16,35($5)
- lwr $16,32($5)
- srl $20,$15,24 # byte swap(7)
- srl $21,$15,8
- andi $22,$15,0xFF00
- sll $15,$15,24
- andi $21,0xFF00
- sll $22,$22,8
- or $15,$20
- or $21,$22
- or $15,$21
- addu $19,$15,$1 # 7
- srl $1,$25,6
- xor $22,$30,$31
- sll $21,$25,7
- and $22,$25
- srl $20,$25,11
- xor $1,$21
- sll $21,$25,21
- xor $1,$20
- srl $20,$25,25
- xor $1,$21
- sll $21,$25,26
- xor $1,$20
- xor $22,$31 # Ch(e,f,g)
- xor $20,$21,$1 # Sigma1(e)
-
- srl $1,$2,2
- addu $19,$22
- lw $22,28($6) # K[7]
- sll $21,$2,10
- addu $19,$20
- srl $20,$2,13
- xor $1,$21
- sll $21,$2,19
- xor $1,$20
- srl $20,$2,22
- xor $1,$21
- sll $21,$2,30
- xor $1,$20
- sw $15,28($29) # offload to ring buffer
- xor $1,$21 # Sigma0(a)
-
- or $20,$2,$3
- and $21,$2,$3
- and $20,$7
- or $21,$20 # Maj(a,b,c)
- addu $19,$22 # +=K[7]
- addu $1,$21
-
- addu $24,$19
- addu $1,$19
- lwl $17,39($5)
- lwr $17,36($5)
- srl $21,$16,24 # byte swap(8)
- srl $22,$16,8
- andi $23,$16,0xFF00
- sll $16,$16,24
- andi $22,0xFF00
- sll $23,$23,8
- or $16,$21
- or $22,$23
- or $16,$22
- addu $20,$16,$31 # 8
- srl $31,$24,6
- xor $23,$25,$30
- sll $22,$24,7
- and $23,$24
- srl $21,$24,11
- xor $31,$22
- sll $22,$24,21
- xor $31,$21
- srl $21,$24,25
- xor $31,$22
- sll $22,$24,26
- xor $31,$21
- xor $23,$30 # Ch(e,f,g)
- xor $21,$22,$31 # Sigma1(e)
-
- srl $31,$1,2
- addu $20,$23
- lw $23,32($6) # K[8]
- sll $22,$1,10
- addu $20,$21
- srl $21,$1,13
- xor $31,$22
- sll $22,$1,19
- xor $31,$21
- srl $21,$1,22
- xor $31,$22
- sll $22,$1,30
- xor $31,$21
- sw $16,32($29) # offload to ring buffer
- xor $31,$22 # Sigma0(a)
-
- or $21,$1,$2
- and $22,$1,$2
- and $21,$3
- or $22,$21 # Maj(a,b,c)
- addu $20,$23 # +=K[8]
- addu $31,$22
-
- addu $7,$20
- addu $31,$20
- lwl $18,43($5)
- lwr $18,40($5)
- srl $22,$17,24 # byte swap(9)
- srl $23,$17,8
- andi $8,$17,0xFF00
- sll $17,$17,24
- andi $23,0xFF00
- sll $8,$8,8
- or $17,$22
- or $23,$8
- or $17,$23
- addu $21,$17,$30 # 9
- srl $30,$7,6
- xor $8,$24,$25
- sll $23,$7,7
- and $8,$7
- srl $22,$7,11
- xor $30,$23
- sll $23,$7,21
- xor $30,$22
- srl $22,$7,25
- xor $30,$23
- sll $23,$7,26
- xor $30,$22
- xor $8,$25 # Ch(e,f,g)
- xor $22,$23,$30 # Sigma1(e)
-
- srl $30,$31,2
- addu $21,$8
- lw $8,36($6) # K[9]
- sll $23,$31,10
- addu $21,$22
- srl $22,$31,13
- xor $30,$23
- sll $23,$31,19
- xor $30,$22
- srl $22,$31,22
- xor $30,$23
- sll $23,$31,30
- xor $30,$22
- sw $17,36($29) # offload to ring buffer
- xor $30,$23 # Sigma0(a)
-
- or $22,$31,$1
- and $23,$31,$1
- and $22,$2
- or $23,$22 # Maj(a,b,c)
- addu $21,$8 # +=K[9]
- addu $30,$23
-
- addu $3,$21
- addu $30,$21
- lwl $19,47($5)
- lwr $19,44($5)
- srl $23,$18,24 # byte swap(10)
- srl $8,$18,8
- andi $9,$18,0xFF00
- sll $18,$18,24
- andi $8,0xFF00
- sll $9,$9,8
- or $18,$23
- or $8,$9
- or $18,$8
- addu $22,$18,$25 # 10
- srl $25,$3,6
- xor $9,$7,$24
- sll $8,$3,7
- and $9,$3
- srl $23,$3,11
- xor $25,$8
- sll $8,$3,21
- xor $25,$23
- srl $23,$3,25
- xor $25,$8
- sll $8,$3,26
- xor $25,$23
- xor $9,$24 # Ch(e,f,g)
- xor $23,$8,$25 # Sigma1(e)
-
- srl $25,$30,2
- addu $22,$9
- lw $9,40($6) # K[10]
- sll $8,$30,10
- addu $22,$23
- srl $23,$30,13
- xor $25,$8
- sll $8,$30,19
- xor $25,$23
- srl $23,$30,22
- xor $25,$8
- sll $8,$30,30
- xor $25,$23
- sw $18,40($29) # offload to ring buffer
- xor $25,$8 # Sigma0(a)
-
- or $23,$30,$31
- and $8,$30,$31
- and $23,$1
- or $8,$23 # Maj(a,b,c)
- addu $22,$9 # +=K[10]
- addu $25,$8
-
- addu $2,$22
- addu $25,$22
- lwl $20,51($5)
- lwr $20,48($5)
- srl $8,$19,24 # byte swap(11)
- srl $9,$19,8
- andi $10,$19,0xFF00
- sll $19,$19,24
- andi $9,0xFF00
- sll $10,$10,8
- or $19,$8
- or $9,$10
- or $19,$9
- addu $23,$19,$24 # 11
- srl $24,$2,6
- xor $10,$3,$7
- sll $9,$2,7
- and $10,$2
- srl $8,$2,11
- xor $24,$9
- sll $9,$2,21
- xor $24,$8
- srl $8,$2,25
- xor $24,$9
- sll $9,$2,26
- xor $24,$8
- xor $10,$7 # Ch(e,f,g)
- xor $8,$9,$24 # Sigma1(e)
-
- srl $24,$25,2
- addu $23,$10
- lw $10,44($6) # K[11]
- sll $9,$25,10
- addu $23,$8
- srl $8,$25,13
- xor $24,$9
- sll $9,$25,19
- xor $24,$8
- srl $8,$25,22
- xor $24,$9
- sll $9,$25,30
- xor $24,$8
- sw $19,44($29) # offload to ring buffer
- xor $24,$9 # Sigma0(a)
-
- or $8,$25,$30
- and $9,$25,$30
- and $8,$31
- or $9,$8 # Maj(a,b,c)
- addu $23,$10 # +=K[11]
- addu $24,$9
-
- addu $1,$23
- addu $24,$23
- lwl $21,55($5)
- lwr $21,52($5)
- srl $9,$20,24 # byte swap(12)
- srl $10,$20,8
- andi $11,$20,0xFF00
- sll $20,$20,24
- andi $10,0xFF00
- sll $11,$11,8
- or $20,$9
- or $10,$11
- or $20,$10
- addu $8,$20,$7 # 12
- srl $7,$1,6
- xor $11,$2,$3
- sll $10,$1,7
- and $11,$1
- srl $9,$1,11
- xor $7,$10
- sll $10,$1,21
- xor $7,$9
- srl $9,$1,25
- xor $7,$10
- sll $10,$1,26
- xor $7,$9
- xor $11,$3 # Ch(e,f,g)
- xor $9,$10,$7 # Sigma1(e)
-
- srl $7,$24,2
- addu $8,$11
- lw $11,48($6) # K[12]
- sll $10,$24,10
- addu $8,$9
- srl $9,$24,13
- xor $7,$10
- sll $10,$24,19
- xor $7,$9
- srl $9,$24,22
- xor $7,$10
- sll $10,$24,30
- xor $7,$9
- sw $20,48($29) # offload to ring buffer
- xor $7,$10 # Sigma0(a)
-
- or $9,$24,$25
- and $10,$24,$25
- and $9,$30
- or $10,$9 # Maj(a,b,c)
- addu $8,$11 # +=K[12]
- addu $7,$10
-
- addu $31,$8
- addu $7,$8
- lwl $22,59($5)
- lwr $22,56($5)
- srl $10,$21,24 # byte swap(13)
- srl $11,$21,8
- andi $12,$21,0xFF00
- sll $21,$21,24
- andi $11,0xFF00
- sll $12,$12,8
- or $21,$10
- or $11,$12
- or $21,$11
- addu $9,$21,$3 # 13
- srl $3,$31,6
- xor $12,$1,$2
- sll $11,$31,7
- and $12,$31
- srl $10,$31,11
- xor $3,$11
- sll $11,$31,21
- xor $3,$10
- srl $10,$31,25
- xor $3,$11
- sll $11,$31,26
- xor $3,$10
- xor $12,$2 # Ch(e,f,g)
- xor $10,$11,$3 # Sigma1(e)
-
- srl $3,$7,2
- addu $9,$12
- lw $12,52($6) # K[13]
- sll $11,$7,10
- addu $9,$10
- srl $10,$7,13
- xor $3,$11
- sll $11,$7,19
- xor $3,$10
- srl $10,$7,22
- xor $3,$11
- sll $11,$7,30
- xor $3,$10
- sw $21,52($29) # offload to ring buffer
- xor $3,$11 # Sigma0(a)
-
- or $10,$7,$24
- and $11,$7,$24
- and $10,$25
- or $11,$10 # Maj(a,b,c)
- addu $9,$12 # +=K[13]
- addu $3,$11
-
- addu $30,$9
- addu $3,$9
- lw $8,0($29) # prefetch from ring buffer
- lwl $23,63($5)
- lwr $23,60($5)
- srl $11,$22,24 # byte swap(14)
- srl $12,$22,8
- andi $13,$22,0xFF00
- sll $22,$22,24
- andi $12,0xFF00
- sll $13,$13,8
- or $22,$11
- or $12,$13
- or $22,$12
- addu $10,$22,$2 # 14
- srl $2,$30,6
- xor $13,$31,$1
- sll $12,$30,7
- and $13,$30
- srl $11,$30,11
- xor $2,$12
- sll $12,$30,21
- xor $2,$11
- srl $11,$30,25
- xor $2,$12
- sll $12,$30,26
- xor $2,$11
- xor $13,$1 # Ch(e,f,g)
- xor $11,$12,$2 # Sigma1(e)
-
- srl $2,$3,2
- addu $10,$13
- lw $13,56($6) # K[14]
- sll $12,$3,10
- addu $10,$11
- srl $11,$3,13
- xor $2,$12
- sll $12,$3,19
- xor $2,$11
- srl $11,$3,22
- xor $2,$12
- sll $12,$3,30
- xor $2,$11
- sw $22,56($29) # offload to ring buffer
- xor $2,$12 # Sigma0(a)
-
- or $11,$3,$7
- and $12,$3,$7
- and $11,$24
- or $12,$11 # Maj(a,b,c)
- addu $10,$13 # +=K[14]
- addu $2,$12
-
- addu $25,$10
- addu $2,$10
- lw $9,4($29) # prefetch from ring buffer
- srl $12,$23,24 # byte swap(15)
- srl $13,$23,8
- andi $14,$23,0xFF00
- sll $23,$23,24
- andi $13,0xFF00
- sll $14,$14,8
- or $23,$12
- or $13,$14
- or $23,$13
- addu $11,$23,$1 # 15
- srl $1,$25,6
- xor $14,$30,$31
- sll $13,$25,7
- and $14,$25
- srl $12,$25,11
- xor $1,$13
- sll $13,$25,21
- xor $1,$12
- srl $12,$25,25
- xor $1,$13
- sll $13,$25,26
- xor $1,$12
- xor $14,$31 # Ch(e,f,g)
- xor $12,$13,$1 # Sigma1(e)
-
- srl $1,$2,2
- addu $11,$14
- lw $14,60($6) # K[15]
- sll $13,$2,10
- addu $11,$12
- srl $12,$2,13
- xor $1,$13
- sll $13,$2,19
- xor $1,$12
- srl $12,$2,22
- xor $1,$13
- sll $13,$2,30
- xor $1,$12
- sw $23,60($29) # offload to ring buffer
- xor $1,$13 # Sigma0(a)
-
- or $12,$2,$3
- and $13,$2,$3
- and $12,$7
- or $13,$12 # Maj(a,b,c)
- addu $11,$14 # +=K[15]
- addu $1,$13
-
- addu $24,$11
- addu $1,$11
- lw $10,8($29) # prefetch from ring buffer
- b .L16_xx
-.align 4
-.L16_xx:
- srl $14,$9,3 # Xupdate(16)
- addu $8,$17 # +=X[i+9]
- sll $13,$9,14
- srl $12,$9,7
- xor $14,$13
- sll $13,11
- xor $14,$12
- srl $12,$9,18
- xor $14,$13
-
- srl $15,$22,10
- xor $14,$12 # sigma0(X[i+1])
- sll $13,$22,13
- addu $8,$14
- srl $12,$22,17
- xor $15,$13
- sll $13,2
- xor $15,$12
- srl $12,$22,19
- xor $15,$13
-
- xor $15,$12 # sigma1(X[i+14])
- addu $8,$15
- addu $12,$8,$31 # 16
- srl $31,$24,6
- xor $15,$25,$30
- sll $14,$24,7
- and $15,$24
- srl $13,$24,11
- xor $31,$14
- sll $14,$24,21
- xor $31,$13
- srl $13,$24,25
- xor $31,$14
- sll $14,$24,26
- xor $31,$13
- xor $15,$30 # Ch(e,f,g)
- xor $13,$14,$31 # Sigma1(e)
-
- srl $31,$1,2
- addu $12,$15
- lw $15,64($6) # K[16]
- sll $14,$1,10
- addu $12,$13
- srl $13,$1,13
- xor $31,$14
- sll $14,$1,19
- xor $31,$13
- srl $13,$1,22
- xor $31,$14
- sll $14,$1,30
- xor $31,$13
- sw $8,0($29) # offload to ring buffer
- xor $31,$14 # Sigma0(a)
-
- or $13,$1,$2
- and $14,$1,$2
- and $13,$3
- or $14,$13 # Maj(a,b,c)
- addu $12,$15 # +=K[16]
- addu $31,$14
-
- addu $7,$12
- addu $31,$12
- lw $11,12($29) # prefetch from ring buffer
- srl $15,$10,3 # Xupdate(17)
- addu $9,$18 # +=X[i+9]
- sll $14,$10,14
- srl $13,$10,7
- xor $15,$14
- sll $14,11
- xor $15,$13
- srl $13,$10,18
- xor $15,$14
-
- srl $16,$23,10
- xor $15,$13 # sigma0(X[i+1])
- sll $14,$23,13
- addu $9,$15
- srl $13,$23,17
- xor $16,$14
- sll $14,2
- xor $16,$13
- srl $13,$23,19
- xor $16,$14
-
- xor $16,$13 # sigma1(X[i+14])
- addu $9,$16
- addu $13,$9,$30 # 17
- srl $30,$7,6
- xor $16,$24,$25
- sll $15,$7,7
- and $16,$7
- srl $14,$7,11
- xor $30,$15
- sll $15,$7,21
- xor $30,$14
- srl $14,$7,25
- xor $30,$15
- sll $15,$7,26
- xor $30,$14
- xor $16,$25 # Ch(e,f,g)
- xor $14,$15,$30 # Sigma1(e)
-
- srl $30,$31,2
- addu $13,$16
- lw $16,68($6) # K[17]
- sll $15,$31,10
- addu $13,$14
- srl $14,$31,13
- xor $30,$15
- sll $15,$31,19
- xor $30,$14
- srl $14,$31,22
- xor $30,$15
- sll $15,$31,30
- xor $30,$14
- sw $9,4($29) # offload to ring buffer
- xor $30,$15 # Sigma0(a)
-
- or $14,$31,$1
- and $15,$31,$1
- and $14,$2
- or $15,$14 # Maj(a,b,c)
- addu $13,$16 # +=K[17]
- addu $30,$15
-
- addu $3,$13
- addu $30,$13
- lw $12,16($29) # prefetch from ring buffer
- srl $16,$11,3 # Xupdate(18)
- addu $10,$19 # +=X[i+9]
- sll $15,$11,14
- srl $14,$11,7
- xor $16,$15
- sll $15,11
- xor $16,$14
- srl $14,$11,18
- xor $16,$15
-
- srl $17,$8,10
- xor $16,$14 # sigma0(X[i+1])
- sll $15,$8,13
- addu $10,$16
- srl $14,$8,17
- xor $17,$15
- sll $15,2
- xor $17,$14
- srl $14,$8,19
- xor $17,$15
-
- xor $17,$14 # sigma1(X[i+14])
- addu $10,$17
- addu $14,$10,$25 # 18
- srl $25,$3,6
- xor $17,$7,$24
- sll $16,$3,7
- and $17,$3
- srl $15,$3,11
- xor $25,$16
- sll $16,$3,21
- xor $25,$15
- srl $15,$3,25
- xor $25,$16
- sll $16,$3,26
- xor $25,$15
- xor $17,$24 # Ch(e,f,g)
- xor $15,$16,$25 # Sigma1(e)
-
- srl $25,$30,2
- addu $14,$17
- lw $17,72($6) # K[18]
- sll $16,$30,10
- addu $14,$15
- srl $15,$30,13
- xor $25,$16
- sll $16,$30,19
- xor $25,$15
- srl $15,$30,22
- xor $25,$16
- sll $16,$30,30
- xor $25,$15
- sw $10,8($29) # offload to ring buffer
- xor $25,$16 # Sigma0(a)
-
- or $15,$30,$31
- and $16,$30,$31
- and $15,$1
- or $16,$15 # Maj(a,b,c)
- addu $14,$17 # +=K[18]
- addu $25,$16
-
- addu $2,$14
- addu $25,$14
- lw $13,20($29) # prefetch from ring buffer
- srl $17,$12,3 # Xupdate(19)
- addu $11,$20 # +=X[i+9]
- sll $16,$12,14
- srl $15,$12,7
- xor $17,$16
- sll $16,11
- xor $17,$15
- srl $15,$12,18
- xor $17,$16
-
- srl $18,$9,10
- xor $17,$15 # sigma0(X[i+1])
- sll $16,$9,13
- addu $11,$17
- srl $15,$9,17
- xor $18,$16
- sll $16,2
- xor $18,$15
- srl $15,$9,19
- xor $18,$16
-
- xor $18,$15 # sigma1(X[i+14])
- addu $11,$18
- addu $15,$11,$24 # 19
- srl $24,$2,6
- xor $18,$3,$7
- sll $17,$2,7
- and $18,$2
- srl $16,$2,11
- xor $24,$17
- sll $17,$2,21
- xor $24,$16
- srl $16,$2,25
- xor $24,$17
- sll $17,$2,26
- xor $24,$16
- xor $18,$7 # Ch(e,f,g)
- xor $16,$17,$24 # Sigma1(e)
-
- srl $24,$25,2
- addu $15,$18
- lw $18,76($6) # K[19]
- sll $17,$25,10
- addu $15,$16
- srl $16,$25,13
- xor $24,$17
- sll $17,$25,19
- xor $24,$16
- srl $16,$25,22
- xor $24,$17
- sll $17,$25,30
- xor $24,$16
- sw $11,12($29) # offload to ring buffer
- xor $24,$17 # Sigma0(a)
-
- or $16,$25,$30
- and $17,$25,$30
- and $16,$31
- or $17,$16 # Maj(a,b,c)
- addu $15,$18 # +=K[19]
- addu $24,$17
-
- addu $1,$15
- addu $24,$15
- lw $14,24($29) # prefetch from ring buffer
- srl $18,$13,3 # Xupdate(20)
- addu $12,$21 # +=X[i+9]
- sll $17,$13,14
- srl $16,$13,7
- xor $18,$17
- sll $17,11
- xor $18,$16
- srl $16,$13,18
- xor $18,$17
-
- srl $19,$10,10
- xor $18,$16 # sigma0(X[i+1])
- sll $17,$10,13
- addu $12,$18
- srl $16,$10,17
- xor $19,$17
- sll $17,2
- xor $19,$16
- srl $16,$10,19
- xor $19,$17
-
- xor $19,$16 # sigma1(X[i+14])
- addu $12,$19
- addu $16,$12,$7 # 20
- srl $7,$1,6
- xor $19,$2,$3
- sll $18,$1,7
- and $19,$1
- srl $17,$1,11
- xor $7,$18
- sll $18,$1,21
- xor $7,$17
- srl $17,$1,25
- xor $7,$18
- sll $18,$1,26
- xor $7,$17
- xor $19,$3 # Ch(e,f,g)
- xor $17,$18,$7 # Sigma1(e)
-
- srl $7,$24,2
- addu $16,$19
- lw $19,80($6) # K[20]
- sll $18,$24,10
- addu $16,$17
- srl $17,$24,13
- xor $7,$18
- sll $18,$24,19
- xor $7,$17
- srl $17,$24,22
- xor $7,$18
- sll $18,$24,30
- xor $7,$17
- sw $12,16($29) # offload to ring buffer
- xor $7,$18 # Sigma0(a)
-
- or $17,$24,$25
- and $18,$24,$25
- and $17,$30
- or $18,$17 # Maj(a,b,c)
- addu $16,$19 # +=K[20]
- addu $7,$18
-
- addu $31,$16
- addu $7,$16
- lw $15,28($29) # prefetch from ring buffer
- srl $19,$14,3 # Xupdate(21)
- addu $13,$22 # +=X[i+9]
- sll $18,$14,14
- srl $17,$14,7
- xor $19,$18
- sll $18,11
- xor $19,$17
- srl $17,$14,18
- xor $19,$18
-
- srl $20,$11,10
- xor $19,$17 # sigma0(X[i+1])
- sll $18,$11,13
- addu $13,$19
- srl $17,$11,17
- xor $20,$18
- sll $18,2
- xor $20,$17
- srl $17,$11,19
- xor $20,$18
-
- xor $20,$17 # sigma1(X[i+14])
- addu $13,$20
- addu $17,$13,$3 # 21
- srl $3,$31,6
- xor $20,$1,$2
- sll $19,$31,7
- and $20,$31
- srl $18,$31,11
- xor $3,$19
- sll $19,$31,21
- xor $3,$18
- srl $18,$31,25
- xor $3,$19
- sll $19,$31,26
- xor $3,$18
- xor $20,$2 # Ch(e,f,g)
- xor $18,$19,$3 # Sigma1(e)
-
- srl $3,$7,2
- addu $17,$20
- lw $20,84($6) # K[21]
- sll $19,$7,10
- addu $17,$18
- srl $18,$7,13
- xor $3,$19
- sll $19,$7,19
- xor $3,$18
- srl $18,$7,22
- xor $3,$19
- sll $19,$7,30
- xor $3,$18
- sw $13,20($29) # offload to ring buffer
- xor $3,$19 # Sigma0(a)
-
- or $18,$7,$24
- and $19,$7,$24
- and $18,$25
- or $19,$18 # Maj(a,b,c)
- addu $17,$20 # +=K[21]
- addu $3,$19
-
- addu $30,$17
- addu $3,$17
- lw $16,32($29) # prefetch from ring buffer
- srl $20,$15,3 # Xupdate(22)
- addu $14,$23 # +=X[i+9]
- sll $19,$15,14
- srl $18,$15,7
- xor $20,$19
- sll $19,11
- xor $20,$18
- srl $18,$15,18
- xor $20,$19
-
- srl $21,$12,10
- xor $20,$18 # sigma0(X[i+1])
- sll $19,$12,13
- addu $14,$20
- srl $18,$12,17
- xor $21,$19
- sll $19,2
- xor $21,$18
- srl $18,$12,19
- xor $21,$19
-
- xor $21,$18 # sigma1(X[i+14])
- addu $14,$21
- addu $18,$14,$2 # 22
- srl $2,$30,6
- xor $21,$31,$1
- sll $20,$30,7
- and $21,$30
- srl $19,$30,11
- xor $2,$20
- sll $20,$30,21
- xor $2,$19
- srl $19,$30,25
- xor $2,$20
- sll $20,$30,26
- xor $2,$19
- xor $21,$1 # Ch(e,f,g)
- xor $19,$20,$2 # Sigma1(e)
-
- srl $2,$3,2
- addu $18,$21
- lw $21,88($6) # K[22]
- sll $20,$3,10
- addu $18,$19
- srl $19,$3,13
- xor $2,$20
- sll $20,$3,19
- xor $2,$19
- srl $19,$3,22
- xor $2,$20
- sll $20,$3,30
- xor $2,$19
- sw $14,24($29) # offload to ring buffer
- xor $2,$20 # Sigma0(a)
-
- or $19,$3,$7
- and $20,$3,$7
- and $19,$24
- or $20,$19 # Maj(a,b,c)
- addu $18,$21 # +=K[22]
- addu $2,$20
-
- addu $25,$18
- addu $2,$18
- lw $17,36($29) # prefetch from ring buffer
- srl $21,$16,3 # Xupdate(23)
- addu $15,$8 # +=X[i+9]
- sll $20,$16,14
- srl $19,$16,7
- xor $21,$20
- sll $20,11
- xor $21,$19
- srl $19,$16,18
- xor $21,$20
-
- srl $22,$13,10
- xor $21,$19 # sigma0(X[i+1])
- sll $20,$13,13
- addu $15,$21
- srl $19,$13,17
- xor $22,$20
- sll $20,2
- xor $22,$19
- srl $19,$13,19
- xor $22,$20
-
- xor $22,$19 # sigma1(X[i+14])
- addu $15,$22
- addu $19,$15,$1 # 23
- srl $1,$25,6
- xor $22,$30,$31
- sll $21,$25,7
- and $22,$25
- srl $20,$25,11
- xor $1,$21
- sll $21,$25,21
- xor $1,$20
- srl $20,$25,25
- xor $1,$21
- sll $21,$25,26
- xor $1,$20
- xor $22,$31 # Ch(e,f,g)
- xor $20,$21,$1 # Sigma1(e)
-
- srl $1,$2,2
- addu $19,$22
- lw $22,92($6) # K[23]
- sll $21,$2,10
- addu $19,$20
- srl $20,$2,13
- xor $1,$21
- sll $21,$2,19
- xor $1,$20
- srl $20,$2,22
- xor $1,$21
- sll $21,$2,30
- xor $1,$20
- sw $15,28($29) # offload to ring buffer
- xor $1,$21 # Sigma0(a)
-
- or $20,$2,$3
- and $21,$2,$3
- and $20,$7
- or $21,$20 # Maj(a,b,c)
- addu $19,$22 # +=K[23]
- addu $1,$21
-
- addu $24,$19
- addu $1,$19
- lw $18,40($29) # prefetch from ring buffer
- srl $22,$17,3 # Xupdate(24)
- addu $16,$9 # +=X[i+9]
- sll $21,$17,14
- srl $20,$17,7
- xor $22,$21
- sll $21,11
- xor $22,$20
- srl $20,$17,18
- xor $22,$21
-
- srl $23,$14,10
- xor $22,$20 # sigma0(X[i+1])
- sll $21,$14,13
- addu $16,$22
- srl $20,$14,17
- xor $23,$21
- sll $21,2
- xor $23,$20
- srl $20,$14,19
- xor $23,$21
-
- xor $23,$20 # sigma1(X[i+14])
- addu $16,$23
- addu $20,$16,$31 # 24
- srl $31,$24,6
- xor $23,$25,$30
- sll $22,$24,7
- and $23,$24
- srl $21,$24,11
- xor $31,$22
- sll $22,$24,21
- xor $31,$21
- srl $21,$24,25
- xor $31,$22
- sll $22,$24,26
- xor $31,$21
- xor $23,$30 # Ch(e,f,g)
- xor $21,$22,$31 # Sigma1(e)
-
- srl $31,$1,2
- addu $20,$23
- lw $23,96($6) # K[24]
- sll $22,$1,10
- addu $20,$21
- srl $21,$1,13
- xor $31,$22
- sll $22,$1,19
- xor $31,$21
- srl $21,$1,22
- xor $31,$22
- sll $22,$1,30
- xor $31,$21
- sw $16,32($29) # offload to ring buffer
- xor $31,$22 # Sigma0(a)
-
- or $21,$1,$2
- and $22,$1,$2
- and $21,$3
- or $22,$21 # Maj(a,b,c)
- addu $20,$23 # +=K[24]
- addu $31,$22
-
- addu $7,$20
- addu $31,$20
- lw $19,44($29) # prefetch from ring buffer
- srl $23,$18,3 # Xupdate(25)
- addu $17,$10 # +=X[i+9]
- sll $22,$18,14
- srl $21,$18,7
- xor $23,$22
- sll $22,11
- xor $23,$21
- srl $21,$18,18
- xor $23,$22
-
- srl $8,$15,10
- xor $23,$21 # sigma0(X[i+1])
- sll $22,$15,13
- addu $17,$23
- srl $21,$15,17
- xor $8,$22
- sll $22,2
- xor $8,$21
- srl $21,$15,19
- xor $8,$22
-
- xor $8,$21 # sigma1(X[i+14])
- addu $17,$8
- addu $21,$17,$30 # 25
- srl $30,$7,6
- xor $8,$24,$25
- sll $23,$7,7
- and $8,$7
- srl $22,$7,11
- xor $30,$23
- sll $23,$7,21
- xor $30,$22
- srl $22,$7,25
- xor $30,$23
- sll $23,$7,26
- xor $30,$22
- xor $8,$25 # Ch(e,f,g)
- xor $22,$23,$30 # Sigma1(e)
-
- srl $30,$31,2
- addu $21,$8
- lw $8,100($6) # K[25]
- sll $23,$31,10
- addu $21,$22
- srl $22,$31,13
- xor $30,$23
- sll $23,$31,19
- xor $30,$22
- srl $22,$31,22
- xor $30,$23
- sll $23,$31,30
- xor $30,$22
- sw $17,36($29) # offload to ring buffer
- xor $30,$23 # Sigma0(a)
-
- or $22,$31,$1
- and $23,$31,$1
- and $22,$2
- or $23,$22 # Maj(a,b,c)
- addu $21,$8 # +=K[25]
- addu $30,$23
-
- addu $3,$21
- addu $30,$21
- lw $20,48($29) # prefetch from ring buffer
- srl $8,$19,3 # Xupdate(26)
- addu $18,$11 # +=X[i+9]
- sll $23,$19,14
- srl $22,$19,7
- xor $8,$23
- sll $23,11
- xor $8,$22
- srl $22,$19,18
- xor $8,$23
-
- srl $9,$16,10
- xor $8,$22 # sigma0(X[i+1])
- sll $23,$16,13
- addu $18,$8
- srl $22,$16,17
- xor $9,$23
- sll $23,2
- xor $9,$22
- srl $22,$16,19
- xor $9,$23
-
- xor $9,$22 # sigma1(X[i+14])
- addu $18,$9
- addu $22,$18,$25 # 26
- srl $25,$3,6
- xor $9,$7,$24
- sll $8,$3,7
- and $9,$3
- srl $23,$3,11
- xor $25,$8
- sll $8,$3,21
- xor $25,$23
- srl $23,$3,25
- xor $25,$8
- sll $8,$3,26
- xor $25,$23
- xor $9,$24 # Ch(e,f,g)
- xor $23,$8,$25 # Sigma1(e)
-
- srl $25,$30,2
- addu $22,$9
- lw $9,104($6) # K[26]
- sll $8,$30,10
- addu $22,$23
- srl $23,$30,13
- xor $25,$8
- sll $8,$30,19
- xor $25,$23
- srl $23,$30,22
- xor $25,$8
- sll $8,$30,30
- xor $25,$23
- sw $18,40($29) # offload to ring buffer
- xor $25,$8 # Sigma0(a)
-
- or $23,$30,$31
- and $8,$30,$31
- and $23,$1
- or $8,$23 # Maj(a,b,c)
- addu $22,$9 # +=K[26]
- addu $25,$8
-
- addu $2,$22
- addu $25,$22
- lw $21,52($29) # prefetch from ring buffer
- srl $9,$20,3 # Xupdate(27)
- addu $19,$12 # +=X[i+9]
- sll $8,$20,14
- srl $23,$20,7
- xor $9,$8
- sll $8,11
- xor $9,$23
- srl $23,$20,18
- xor $9,$8
-
- srl $10,$17,10
- xor $9,$23 # sigma0(X[i+1])
- sll $8,$17,13
- addu $19,$9
- srl $23,$17,17
- xor $10,$8
- sll $8,2
- xor $10,$23
- srl $23,$17,19
- xor $10,$8
-
- xor $10,$23 # sigma1(X[i+14])
- addu $19,$10
- addu $23,$19,$24 # 27
- srl $24,$2,6
- xor $10,$3,$7
- sll $9,$2,7
- and $10,$2
- srl $8,$2,11
- xor $24,$9
- sll $9,$2,21
- xor $24,$8
- srl $8,$2,25
- xor $24,$9
- sll $9,$2,26
- xor $24,$8
- xor $10,$7 # Ch(e,f,g)
- xor $8,$9,$24 # Sigma1(e)
-
- srl $24,$25,2
- addu $23,$10
- lw $10,108($6) # K[27]
- sll $9,$25,10
- addu $23,$8
- srl $8,$25,13
- xor $24,$9
- sll $9,$25,19
- xor $24,$8
- srl $8,$25,22
- xor $24,$9
- sll $9,$25,30
- xor $24,$8
- sw $19,44($29) # offload to ring buffer
- xor $24,$9 # Sigma0(a)
-
- or $8,$25,$30
- and $9,$25,$30
- and $8,$31
- or $9,$8 # Maj(a,b,c)
- addu $23,$10 # +=K[27]
- addu $24,$9
-
- addu $1,$23
- addu $24,$23
- lw $22,56($29) # prefetch from ring buffer
- srl $10,$21,3 # Xupdate(28)
- addu $20,$13 # +=X[i+9]
- sll $9,$21,14
- srl $8,$21,7
- xor $10,$9
- sll $9,11
- xor $10,$8
- srl $8,$21,18
- xor $10,$9
-
- srl $11,$18,10
- xor $10,$8 # sigma0(X[i+1])
- sll $9,$18,13
- addu $20,$10
- srl $8,$18,17
- xor $11,$9
- sll $9,2
- xor $11,$8
- srl $8,$18,19
- xor $11,$9
-
- xor $11,$8 # sigma1(X[i+14])
- addu $20,$11
- addu $8,$20,$7 # 28
- srl $7,$1,6
- xor $11,$2,$3
- sll $10,$1,7
- and $11,$1
- srl $9,$1,11
- xor $7,$10
- sll $10,$1,21
- xor $7,$9
- srl $9,$1,25
- xor $7,$10
- sll $10,$1,26
- xor $7,$9
- xor $11,$3 # Ch(e,f,g)
- xor $9,$10,$7 # Sigma1(e)
-
- srl $7,$24,2
- addu $8,$11
- lw $11,112($6) # K[28]
- sll $10,$24,10
- addu $8,$9
- srl $9,$24,13
- xor $7,$10
- sll $10,$24,19
- xor $7,$9
- srl $9,$24,22
- xor $7,$10
- sll $10,$24,30
- xor $7,$9
- sw $20,48($29) # offload to ring buffer
- xor $7,$10 # Sigma0(a)
-
- or $9,$24,$25
- and $10,$24,$25
- and $9,$30
- or $10,$9 # Maj(a,b,c)
- addu $8,$11 # +=K[28]
- addu $7,$10
-
- addu $31,$8
- addu $7,$8
- lw $23,60($29) # prefetch from ring buffer
- srl $11,$22,3 # Xupdate(29)
- addu $21,$14 # +=X[i+9]
- sll $10,$22,14
- srl $9,$22,7
- xor $11,$10
- sll $10,11
- xor $11,$9
- srl $9,$22,18
- xor $11,$10
-
- srl $12,$19,10
- xor $11,$9 # sigma0(X[i+1])
- sll $10,$19,13
- addu $21,$11
- srl $9,$19,17
- xor $12,$10
- sll $10,2
- xor $12,$9
- srl $9,$19,19
- xor $12,$10
-
- xor $12,$9 # sigma1(X[i+14])
- addu $21,$12
- addu $9,$21,$3 # 29
- srl $3,$31,6
- xor $12,$1,$2
- sll $11,$31,7
- and $12,$31
- srl $10,$31,11
- xor $3,$11
- sll $11,$31,21
- xor $3,$10
- srl $10,$31,25
- xor $3,$11
- sll $11,$31,26
- xor $3,$10
- xor $12,$2 # Ch(e,f,g)
- xor $10,$11,$3 # Sigma1(e)
-
- srl $3,$7,2
- addu $9,$12
- lw $12,116($6) # K[29]
- sll $11,$7,10
- addu $9,$10
- srl $10,$7,13
- xor $3,$11
- sll $11,$7,19
- xor $3,$10
- srl $10,$7,22
- xor $3,$11
- sll $11,$7,30
- xor $3,$10
- sw $21,52($29) # offload to ring buffer
- xor $3,$11 # Sigma0(a)
-
- or $10,$7,$24
- and $11,$7,$24
- and $10,$25
- or $11,$10 # Maj(a,b,c)
- addu $9,$12 # +=K[29]
- addu $3,$11
-
- addu $30,$9
- addu $3,$9
- lw $8,0($29) # prefetch from ring buffer
- srl $12,$23,3 # Xupdate(30)
- addu $22,$15 # +=X[i+9]
- sll $11,$23,14
- srl $10,$23,7
- xor $12,$11
- sll $11,11
- xor $12,$10
- srl $10,$23,18
- xor $12,$11
-
- srl $13,$20,10
- xor $12,$10 # sigma0(X[i+1])
- sll $11,$20,13
- addu $22,$12
- srl $10,$20,17
- xor $13,$11
- sll $11,2
- xor $13,$10
- srl $10,$20,19
- xor $13,$11
-
- xor $13,$10 # sigma1(X[i+14])
- addu $22,$13
- addu $10,$22,$2 # 30
- srl $2,$30,6
- xor $13,$31,$1
- sll $12,$30,7
- and $13,$30
- srl $11,$30,11
- xor $2,$12
- sll $12,$30,21
- xor $2,$11
- srl $11,$30,25
- xor $2,$12
- sll $12,$30,26
- xor $2,$11
- xor $13,$1 # Ch(e,f,g)
- xor $11,$12,$2 # Sigma1(e)
-
- srl $2,$3,2
- addu $10,$13
- lw $13,120($6) # K[30]
- sll $12,$3,10
- addu $10,$11
- srl $11,$3,13
- xor $2,$12
- sll $12,$3,19
- xor $2,$11
- srl $11,$3,22
- xor $2,$12
- sll $12,$3,30
- xor $2,$11
- sw $22,56($29) # offload to ring buffer
- xor $2,$12 # Sigma0(a)
-
- or $11,$3,$7
- and $12,$3,$7
- and $11,$24
- or $12,$11 # Maj(a,b,c)
- addu $10,$13 # +=K[30]
- addu $2,$12
-
- addu $25,$10
- addu $2,$10
- lw $9,4($29) # prefetch from ring buffer
- srl $13,$8,3 # Xupdate(31)
- addu $23,$16 # +=X[i+9]
- sll $12,$8,14
- srl $11,$8,7
- xor $13,$12
- sll $12,11
- xor $13,$11
- srl $11,$8,18
- xor $13,$12
-
- srl $14,$21,10
- xor $13,$11 # sigma0(X[i+1])
- sll $12,$21,13
- addu $23,$13
- srl $11,$21,17
- xor $14,$12
- sll $12,2
- xor $14,$11
- srl $11,$21,19
- xor $14,$12
-
- xor $14,$11 # sigma1(X[i+14])
- addu $23,$14
- addu $11,$23,$1 # 31
- srl $1,$25,6
- xor $14,$30,$31
- sll $13,$25,7
- and $14,$25
- srl $12,$25,11
- xor $1,$13
- sll $13,$25,21
- xor $1,$12
- srl $12,$25,25
- xor $1,$13
- sll $13,$25,26
- xor $1,$12
- xor $14,$31 # Ch(e,f,g)
- xor $12,$13,$1 # Sigma1(e)
-
- srl $1,$2,2
- addu $11,$14
- lw $14,124($6) # K[31]
- sll $13,$2,10
- addu $11,$12
- srl $12,$2,13
- xor $1,$13
- sll $13,$2,19
- xor $1,$12
- srl $12,$2,22
- xor $1,$13
- sll $13,$2,30
- xor $1,$12
- sw $23,60($29) # offload to ring buffer
- xor $1,$13 # Sigma0(a)
-
- or $12,$2,$3
- and $13,$2,$3
- and $12,$7
- or $13,$12 # Maj(a,b,c)
- addu $11,$14 # +=K[31]
- addu $1,$13
-
- addu $24,$11
- addu $1,$11
- lw $10,8($29) # prefetch from ring buffer
- and $14,0xfff
- li $15,2290
- .set noreorder
- bne $14,$15,.L16_xx
- add $6,16*4 # Ktbl+=16
-
- lw $23,16*4($29) # restore pointer to the end of input
- lw $8,0*4($4)
- lw $9,1*4($4)
- lw $10,2*4($4)
- add $5,16*4
- lw $11,3*4($4)
- addu $1,$8
- lw $12,4*4($4)
- addu $2,$9
- lw $13,5*4($4)
- addu $3,$10
- lw $14,6*4($4)
- addu $7,$11
- lw $15,7*4($4)
- addu $24,$12
- sw $1,0*4($4)
- addu $25,$13
- sw $2,1*4($4)
- addu $30,$14
- sw $3,2*4($4)
- addu $31,$15
- sw $7,3*4($4)
- sw $24,4*4($4)
- sw $25,5*4($4)
- sw $30,6*4($4)
- sw $31,7*4($4)
-
- bne $5,$23,.Loop
- sub $6,192 # rewind $6
-
- lw $31,128-1*4($29)
- lw $30,128-2*4($29)
- lw $23,128-3*4($29)
- lw $22,128-4*4($29)
- lw $21,128-5*4($29)
- lw $20,128-6*4($29)
- lw $19,128-7*4($29)
- lw $18,128-8*4($29)
- lw $17,128-9*4($29)
- lw $16,128-10*4($29)
- jr $31
- add $29,128
-.end sha256_block_data_order
-
-.rdata
-.align 5
-K256:
- .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
- .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
- .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
- .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
- .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
- .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
- .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
- .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
- .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
- .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
- .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
- .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
- .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
- .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
- .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
- .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-.asciiz "SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
-.align 5
-
diff --git a/app/openssl/crypto/sha/asm/sha256-x86_64.S b/app/openssl/crypto/sha/asm/sha256-x86_64.S
deleted file mode 100644
index db5b898f..00000000
--- a/app/openssl/crypto/sha/asm/sha256-x86_64.S
+++ /dev/null
@@ -1,1778 +0,0 @@
-.text
-
-.globl sha256_block_data_order
-.type sha256_block_data_order,@function
-.align 16
-sha256_block_data_order:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- movq %rsp,%r11
- shlq $4,%rdx
- subq $64+32,%rsp
- leaq (%rsi,%rdx,4),%rdx
- andq $-64,%rsp
- movq %rdi,64+0(%rsp)
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
-.Lprologue:
-
- leaq K256(%rip),%rbp
-
- movl 0(%rdi),%eax
- movl 4(%rdi),%ebx
- movl 8(%rdi),%ecx
- movl 12(%rdi),%edx
- movl 16(%rdi),%r8d
- movl 20(%rdi),%r9d
- movl 24(%rdi),%r10d
- movl 28(%rdi),%r11d
- jmp .Lloop
-
-.align 16
-.Lloop:
- xorq %rdi,%rdi
- movl 0(%rsi),%r12d
- movl %r8d,%r13d
- movl %eax,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,0(%rsp)
-
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
-
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
-
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
-
- addl %r12d,%edx
- addl %r12d,%r11d
- leaq 1(%rdi),%rdi
- addl %r14d,%r11d
-
- movl 4(%rsi),%r12d
- movl %edx,%r13d
- movl %r11d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,4(%rsp)
-
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
-
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
-
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
-
- addl %r12d,%ecx
- addl %r12d,%r10d
- leaq 1(%rdi),%rdi
- addl %r14d,%r10d
-
- movl 8(%rsi),%r12d
- movl %ecx,%r13d
- movl %r10d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,8(%rsp)
-
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
-
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
-
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
-
- addl %r12d,%ebx
- addl %r12d,%r9d
- leaq 1(%rdi),%rdi
- addl %r14d,%r9d
-
- movl 12(%rsi),%r12d
- movl %ebx,%r13d
- movl %r9d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,12(%rsp)
-
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
-
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
-
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
-
- addl %r12d,%eax
- addl %r12d,%r8d
- leaq 1(%rdi),%rdi
- addl %r14d,%r8d
-
- movl 16(%rsi),%r12d
- movl %eax,%r13d
- movl %r8d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,16(%rsp)
-
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
-
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
-
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
-
- addl %r12d,%r11d
- addl %r12d,%edx
- leaq 1(%rdi),%rdi
- addl %r14d,%edx
-
- movl 20(%rsi),%r12d
- movl %r11d,%r13d
- movl %edx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,20(%rsp)
-
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
-
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
-
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
-
- addl %r12d,%r10d
- addl %r12d,%ecx
- leaq 1(%rdi),%rdi
- addl %r14d,%ecx
-
- movl 24(%rsi),%r12d
- movl %r10d,%r13d
- movl %ecx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,24(%rsp)
-
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
-
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
-
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
-
- addl %r12d,%r9d
- addl %r12d,%ebx
- leaq 1(%rdi),%rdi
- addl %r14d,%ebx
-
- movl 28(%rsi),%r12d
- movl %r9d,%r13d
- movl %ebx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,28(%rsp)
-
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
-
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
-
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
-
- addl %r12d,%r8d
- addl %r12d,%eax
- leaq 1(%rdi),%rdi
- addl %r14d,%eax
-
- movl 32(%rsi),%r12d
- movl %r8d,%r13d
- movl %eax,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,32(%rsp)
-
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
-
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
-
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
-
- addl %r12d,%edx
- addl %r12d,%r11d
- leaq 1(%rdi),%rdi
- addl %r14d,%r11d
-
- movl 36(%rsi),%r12d
- movl %edx,%r13d
- movl %r11d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,36(%rsp)
-
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
-
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
-
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
-
- addl %r12d,%ecx
- addl %r12d,%r10d
- leaq 1(%rdi),%rdi
- addl %r14d,%r10d
-
- movl 40(%rsi),%r12d
- movl %ecx,%r13d
- movl %r10d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,40(%rsp)
-
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
-
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
-
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
-
- addl %r12d,%ebx
- addl %r12d,%r9d
- leaq 1(%rdi),%rdi
- addl %r14d,%r9d
-
- movl 44(%rsi),%r12d
- movl %ebx,%r13d
- movl %r9d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,44(%rsp)
-
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
-
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
-
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
-
- addl %r12d,%eax
- addl %r12d,%r8d
- leaq 1(%rdi),%rdi
- addl %r14d,%r8d
-
- movl 48(%rsi),%r12d
- movl %eax,%r13d
- movl %r8d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,48(%rsp)
-
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
-
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
-
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
-
- addl %r12d,%r11d
- addl %r12d,%edx
- leaq 1(%rdi),%rdi
- addl %r14d,%edx
-
- movl 52(%rsi),%r12d
- movl %r11d,%r13d
- movl %edx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,52(%rsp)
-
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
-
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
-
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
-
- addl %r12d,%r10d
- addl %r12d,%ecx
- leaq 1(%rdi),%rdi
- addl %r14d,%ecx
-
- movl 56(%rsi),%r12d
- movl %r10d,%r13d
- movl %ecx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,56(%rsp)
-
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
-
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
-
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
-
- addl %r12d,%r9d
- addl %r12d,%ebx
- leaq 1(%rdi),%rdi
- addl %r14d,%ebx
-
- movl 60(%rsi),%r12d
- movl %r9d,%r13d
- movl %ebx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,60(%rsp)
-
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
-
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
-
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
-
- addl %r12d,%r8d
- addl %r12d,%eax
- leaq 1(%rdi),%rdi
- addl %r14d,%eax
-
- jmp .Lrounds_16_xx
-.align 16
-.Lrounds_16_xx:
- movl 4(%rsp),%r13d
- movl 56(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 36(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 0(%rsp),%r12d
- movl %r8d,%r13d
- addl %r14d,%r12d
- movl %eax,%r14d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,0(%rsp)
-
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
-
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
-
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
-
- addl %r12d,%edx
- addl %r12d,%r11d
- leaq 1(%rdi),%rdi
- addl %r14d,%r11d
-
- movl 8(%rsp),%r13d
- movl 60(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 40(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 4(%rsp),%r12d
- movl %edx,%r13d
- addl %r14d,%r12d
- movl %r11d,%r14d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,4(%rsp)
-
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
-
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
-
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
-
- addl %r12d,%ecx
- addl %r12d,%r10d
- leaq 1(%rdi),%rdi
- addl %r14d,%r10d
-
- movl 12(%rsp),%r13d
- movl 0(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 44(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 8(%rsp),%r12d
- movl %ecx,%r13d
- addl %r14d,%r12d
- movl %r10d,%r14d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,8(%rsp)
-
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
-
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
-
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
-
- addl %r12d,%ebx
- addl %r12d,%r9d
- leaq 1(%rdi),%rdi
- addl %r14d,%r9d
-
- movl 16(%rsp),%r13d
- movl 4(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 48(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 12(%rsp),%r12d
- movl %ebx,%r13d
- addl %r14d,%r12d
- movl %r9d,%r14d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,12(%rsp)
-
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
-
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
-
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
-
- addl %r12d,%eax
- addl %r12d,%r8d
- leaq 1(%rdi),%rdi
- addl %r14d,%r8d
-
- movl 20(%rsp),%r13d
- movl 8(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 52(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 16(%rsp),%r12d
- movl %eax,%r13d
- addl %r14d,%r12d
- movl %r8d,%r14d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,16(%rsp)
-
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
-
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
-
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
-
- addl %r12d,%r11d
- addl %r12d,%edx
- leaq 1(%rdi),%rdi
- addl %r14d,%edx
-
- movl 24(%rsp),%r13d
- movl 12(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 56(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 20(%rsp),%r12d
- movl %r11d,%r13d
- addl %r14d,%r12d
- movl %edx,%r14d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,20(%rsp)
-
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
-
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
-
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
-
- addl %r12d,%r10d
- addl %r12d,%ecx
- leaq 1(%rdi),%rdi
- addl %r14d,%ecx
-
- movl 28(%rsp),%r13d
- movl 16(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 60(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 24(%rsp),%r12d
- movl %r10d,%r13d
- addl %r14d,%r12d
- movl %ecx,%r14d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,24(%rsp)
-
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
-
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
-
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
-
- addl %r12d,%r9d
- addl %r12d,%ebx
- leaq 1(%rdi),%rdi
- addl %r14d,%ebx
-
- movl 32(%rsp),%r13d
- movl 20(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 0(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 28(%rsp),%r12d
- movl %r9d,%r13d
- addl %r14d,%r12d
- movl %ebx,%r14d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,28(%rsp)
-
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
-
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
-
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
-
- addl %r12d,%r8d
- addl %r12d,%eax
- leaq 1(%rdi),%rdi
- addl %r14d,%eax
-
- movl 36(%rsp),%r13d
- movl 24(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 4(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 32(%rsp),%r12d
- movl %r8d,%r13d
- addl %r14d,%r12d
- movl %eax,%r14d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,32(%rsp)
-
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
-
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
-
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
-
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
-
- addl %r12d,%edx
- addl %r12d,%r11d
- leaq 1(%rdi),%rdi
- addl %r14d,%r11d
-
- movl 40(%rsp),%r13d
- movl 28(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 8(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 36(%rsp),%r12d
- movl %edx,%r13d
- addl %r14d,%r12d
- movl %r11d,%r14d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,36(%rsp)
-
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
-
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
-
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
-
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
-
- addl %r12d,%ecx
- addl %r12d,%r10d
- leaq 1(%rdi),%rdi
- addl %r14d,%r10d
-
- movl 44(%rsp),%r13d
- movl 32(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 12(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 40(%rsp),%r12d
- movl %ecx,%r13d
- addl %r14d,%r12d
- movl %r10d,%r14d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,40(%rsp)
-
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
-
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
-
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
-
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
-
- addl %r12d,%ebx
- addl %r12d,%r9d
- leaq 1(%rdi),%rdi
- addl %r14d,%r9d
-
- movl 48(%rsp),%r13d
- movl 36(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 16(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 44(%rsp),%r12d
- movl %ebx,%r13d
- addl %r14d,%r12d
- movl %r9d,%r14d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,44(%rsp)
-
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
-
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
-
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
-
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
-
- addl %r12d,%eax
- addl %r12d,%r8d
- leaq 1(%rdi),%rdi
- addl %r14d,%r8d
-
- movl 52(%rsp),%r13d
- movl 40(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 20(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 48(%rsp),%r12d
- movl %eax,%r13d
- addl %r14d,%r12d
- movl %r8d,%r14d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,48(%rsp)
-
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
-
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
-
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
-
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
-
- addl %r12d,%r11d
- addl %r12d,%edx
- leaq 1(%rdi),%rdi
- addl %r14d,%edx
-
- movl 56(%rsp),%r13d
- movl 44(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 24(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 52(%rsp),%r12d
- movl %r11d,%r13d
- addl %r14d,%r12d
- movl %edx,%r14d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,52(%rsp)
-
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
-
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
-
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
-
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
-
- addl %r12d,%r10d
- addl %r12d,%ecx
- leaq 1(%rdi),%rdi
- addl %r14d,%ecx
-
- movl 60(%rsp),%r13d
- movl 48(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 28(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 56(%rsp),%r12d
- movl %r10d,%r13d
- addl %r14d,%r12d
- movl %ecx,%r14d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,56(%rsp)
-
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
-
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
-
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
-
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
-
- addl %r12d,%r9d
- addl %r12d,%ebx
- leaq 1(%rdi),%rdi
- addl %r14d,%ebx
-
- movl 0(%rsp),%r13d
- movl 52(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
-
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
-
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 32(%rsp),%r12d
-
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
-
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
-
- addl 60(%rsp),%r12d
- movl %r9d,%r13d
- addl %r14d,%r12d
- movl %ebx,%r14d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,60(%rsp)
-
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
-
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
-
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
-
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
-
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
-
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
-
- addl %r12d,%r8d
- addl %r12d,%eax
- leaq 1(%rdi),%rdi
- addl %r14d,%eax
-
- cmpq $64,%rdi
- jb .Lrounds_16_xx
-
- movq 64+0(%rsp),%rdi
- leaq 64(%rsi),%rsi
-
- addl 0(%rdi),%eax
- addl 4(%rdi),%ebx
- addl 8(%rdi),%ecx
- addl 12(%rdi),%edx
- addl 16(%rdi),%r8d
- addl 20(%rdi),%r9d
- addl 24(%rdi),%r10d
- addl 28(%rdi),%r11d
-
- cmpq 64+16(%rsp),%rsi
-
- movl %eax,0(%rdi)
- movl %ebx,4(%rdi)
- movl %ecx,8(%rdi)
- movl %edx,12(%rdi)
- movl %r8d,16(%rdi)
- movl %r9d,20(%rdi)
- movl %r10d,24(%rdi)
- movl %r11d,28(%rdi)
- jb .Lloop
-
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
-.Lepilogue:
- .byte 0xf3,0xc3
-.size sha256_block_data_order,.-sha256_block_data_order
-.align 64
-.type K256,@object
-K256:
-.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/app/openssl/crypto/sha/asm/sha512-586.S b/app/openssl/crypto/sha/asm/sha512-586.S
deleted file mode 100644
index 82c76c41..00000000
--- a/app/openssl/crypto/sha/asm/sha512-586.S
+++ /dev/null
@@ -1,836 +0,0 @@
-.file "sha512-586.s"
-.text
-.globl sha512_block_data_order
-.type sha512_block_data_order,@function
-.align 16
-sha512_block_data_order:
-.L_sha512_block_data_order_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%esi
- movl 24(%esp),%edi
- movl 28(%esp),%eax
- movl %esp,%ebx
- call .L000pic_point
-.L000pic_point:
- popl %ebp
- leal .L001K512-.L000pic_point(%ebp),%ebp
- subl $16,%esp
- andl $-64,%esp
- shll $7,%eax
- addl %edi,%eax
- movl %esi,(%esp)
- movl %edi,4(%esp)
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
- leal _GLOBAL_OFFSET_TABLE_+[.-.L001K512](%ebp),%edx
- movl OPENSSL_ia32cap_P@GOT(%edx),%edx
- btl $26,(%edx)
- jnc .L002loop_x86
- movq (%esi),%mm0
- movq 8(%esi),%mm1
- movq 16(%esi),%mm2
- movq 24(%esi),%mm3
- movq 32(%esi),%mm4
- movq 40(%esi),%mm5
- movq 48(%esi),%mm6
- movq 56(%esi),%mm7
- subl $80,%esp
-.align 16
-.L003loop_sse2:
- movq %mm1,8(%esp)
- movq %mm2,16(%esp)
- movq %mm3,24(%esp)
- movq %mm5,40(%esp)
- movq %mm6,48(%esp)
- movq %mm7,56(%esp)
- movl (%edi),%ecx
- movl 4(%edi),%edx
- addl $8,%edi
- bswap %ecx
- bswap %edx
- movl %ecx,76(%esp)
- movl %edx,72(%esp)
-.align 16
-.L00400_14_sse2:
- movl (%edi),%eax
- movl 4(%edi),%ebx
- addl $8,%edi
- bswap %eax
- bswap %ebx
- movl %eax,68(%esp)
- movl %ebx,64(%esp)
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
- movq 56(%esp),%mm7
- movq %mm4,%mm1
- movq %mm4,%mm2
- psrlq $14,%mm1
- movq %mm4,32(%esp)
- psllq $23,%mm2
- movq %mm1,%mm3
- psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
- pxor %mm1,%mm3
- psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
- pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
- pxor %mm6,%mm5
- movq 8(%esp),%mm1
- pand %mm4,%mm5
- movq 16(%esp),%mm2
- pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
- paddq %mm7,%mm3
- movq %mm0,%mm5
- movq %mm0,%mm6
- paddq 72(%esp),%mm3
- psrlq $28,%mm5
- paddq %mm3,%mm4
- psllq $25,%mm6
- movq %mm5,%mm7
- psrlq $6,%mm5
- pxor %mm6,%mm7
- psllq $5,%mm6
- pxor %mm5,%mm7
- psrlq $5,%mm5
- pxor %mm6,%mm7
- psllq $6,%mm6
- pxor %mm5,%mm7
- subl $8,%esp
- pxor %mm6,%mm7
- movq %mm0,%mm5
- por %mm2,%mm0
- pand %mm2,%mm5
- pand %mm1,%mm0
- por %mm0,%mm5
- paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $53,%dl
- jne .L00400_14_sse2
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
- movq 56(%esp),%mm7
- movq %mm4,%mm1
- movq %mm4,%mm2
- psrlq $14,%mm1
- movq %mm4,32(%esp)
- psllq $23,%mm2
- movq %mm1,%mm3
- psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
- pxor %mm1,%mm3
- psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
- pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
- pxor %mm6,%mm5
- movq 8(%esp),%mm1
- pand %mm4,%mm5
- movq 16(%esp),%mm2
- pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
- paddq %mm7,%mm3
- movq %mm0,%mm5
- movq %mm0,%mm6
- paddq 72(%esp),%mm3
- psrlq $28,%mm5
- paddq %mm3,%mm4
- psllq $25,%mm6
- movq %mm5,%mm7
- psrlq $6,%mm5
- pxor %mm6,%mm7
- psllq $5,%mm6
- pxor %mm5,%mm7
- psrlq $5,%mm5
- pxor %mm6,%mm7
- psllq $6,%mm6
- pxor %mm5,%mm7
- subl $8,%esp
- pxor %mm6,%mm7
- movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
- paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
-.align 16
-.L00516_79_sse2:
- movq %mm2,%mm1
- psrlq $1,%mm2
- movq %mm6,%mm7
- psrlq $6,%mm6
- movq %mm2,%mm3
- psrlq $6,%mm2
- movq %mm6,%mm5
- psrlq $13,%mm6
- pxor %mm2,%mm3
- psrlq $1,%mm2
- pxor %mm6,%mm5
- psrlq $42,%mm6
- pxor %mm2,%mm3
- movq 200(%esp),%mm2
- psllq $56,%mm1
- pxor %mm6,%mm5
- psllq $3,%mm7
- pxor %mm1,%mm3
- paddq 128(%esp),%mm2
- psllq $7,%mm1
- pxor %mm7,%mm5
- psllq $42,%mm7
- pxor %mm1,%mm3
- pxor %mm7,%mm5
- paddq %mm5,%mm3
- paddq %mm2,%mm3
- movq %mm3,72(%esp)
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
- movq 56(%esp),%mm7
- movq %mm4,%mm1
- movq %mm4,%mm2
- psrlq $14,%mm1
- movq %mm4,32(%esp)
- psllq $23,%mm2
- movq %mm1,%mm3
- psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
- pxor %mm1,%mm3
- psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
- pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
- pxor %mm6,%mm5
- movq 8(%esp),%mm1
- pand %mm4,%mm5
- movq 16(%esp),%mm2
- pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
- paddq %mm7,%mm3
- movq %mm0,%mm5
- movq %mm0,%mm6
- paddq 72(%esp),%mm3
- psrlq $28,%mm5
- paddq %mm3,%mm4
- psllq $25,%mm6
- movq %mm5,%mm7
- psrlq $6,%mm5
- pxor %mm6,%mm7
- psllq $5,%mm6
- pxor %mm5,%mm7
- psrlq $5,%mm5
- pxor %mm6,%mm7
- psllq $6,%mm6
- pxor %mm5,%mm7
- subl $8,%esp
- pxor %mm6,%mm7
- movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
- paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $23,%dl
- jne .L00516_79_sse2
- movq 8(%esp),%mm1
- movq 16(%esp),%mm2
- movq 24(%esp),%mm3
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
- movq 56(%esp),%mm7
- paddq (%esi),%mm0
- paddq 8(%esi),%mm1
- paddq 16(%esi),%mm2
- paddq 24(%esi),%mm3
- paddq 32(%esi),%mm4
- paddq 40(%esi),%mm5
- paddq 48(%esi),%mm6
- paddq 56(%esi),%mm7
- movq %mm0,(%esi)
- movq %mm1,8(%esi)
- movq %mm2,16(%esi)
- movq %mm3,24(%esi)
- movq %mm4,32(%esi)
- movq %mm5,40(%esi)
- movq %mm6,48(%esi)
- movq %mm7,56(%esi)
- addl $640,%esp
- subl $640,%ebp
- cmpl 88(%esp),%edi
- jb .L003loop_sse2
- emms
- movl 92(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 16
-.L002loop_x86:
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- movl 12(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 16(%edi),%eax
- movl 20(%edi),%ebx
- movl 24(%edi),%ecx
- movl 28(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 32(%edi),%eax
- movl 36(%edi),%ebx
- movl 40(%edi),%ecx
- movl 44(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 48(%edi),%eax
- movl 52(%edi),%ebx
- movl 56(%edi),%ecx
- movl 60(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 64(%edi),%eax
- movl 68(%edi),%ebx
- movl 72(%edi),%ecx
- movl 76(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 80(%edi),%eax
- movl 84(%edi),%ebx
- movl 88(%edi),%ecx
- movl 92(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 96(%edi),%eax
- movl 100(%edi),%ebx
- movl 104(%edi),%ecx
- movl 108(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- movl 112(%edi),%eax
- movl 116(%edi),%ebx
- movl 120(%edi),%ecx
- movl 124(%edi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- addl $128,%edi
- subl $72,%esp
- movl %edi,204(%esp)
- leal 8(%esp),%edi
- movl $16,%ecx
-.long 2784229001
-.align 16
-.L00600_15_x86:
- movl 40(%esp),%ecx
- movl 44(%esp),%edx
- movl %ecx,%esi
- shrl $9,%ecx
- movl %edx,%edi
- shrl $9,%edx
- movl %ecx,%ebx
- shll $14,%esi
- movl %edx,%eax
- shll $14,%edi
- xorl %esi,%ebx
- shrl $5,%ecx
- xorl %edi,%eax
- shrl $5,%edx
- xorl %ecx,%eax
- shll $4,%esi
- xorl %edx,%ebx
- shll $4,%edi
- xorl %esi,%ebx
- shrl $4,%ecx
- xorl %edi,%eax
- shrl $4,%edx
- xorl %ecx,%eax
- shll $5,%esi
- xorl %edx,%ebx
- shll $5,%edi
- xorl %esi,%eax
- xorl %edi,%ebx
- movl 48(%esp),%ecx
- movl 52(%esp),%edx
- movl 56(%esp),%esi
- movl 60(%esp),%edi
- addl 64(%esp),%eax
- adcl 68(%esp),%ebx
- xorl %esi,%ecx
- xorl %edi,%edx
- andl 40(%esp),%ecx
- andl 44(%esp),%edx
- addl 192(%esp),%eax
- adcl 196(%esp),%ebx
- xorl %esi,%ecx
- xorl %edi,%edx
- movl (%ebp),%esi
- movl 4(%ebp),%edi
- addl %ecx,%eax
- adcl %edx,%ebx
- movl 32(%esp),%ecx
- movl 36(%esp),%edx
- addl %esi,%eax
- adcl %edi,%ebx
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- addl %ecx,%eax
- adcl %edx,%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- movl %eax,32(%esp)
- movl %ebx,36(%esp)
- movl %ecx,%esi
- shrl $2,%ecx
- movl %edx,%edi
- shrl $2,%edx
- movl %ecx,%ebx
- shll $4,%esi
- movl %edx,%eax
- shll $4,%edi
- xorl %esi,%ebx
- shrl $5,%ecx
- xorl %edi,%eax
- shrl $5,%edx
- xorl %ecx,%ebx
- shll $21,%esi
- xorl %edx,%eax
- shll $21,%edi
- xorl %esi,%eax
- shrl $21,%ecx
- xorl %edi,%ebx
- shrl $21,%edx
- xorl %ecx,%eax
- shll $5,%esi
- xorl %edx,%ebx
- shll $5,%edi
- xorl %esi,%eax
- xorl %edi,%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- addl (%esp),%eax
- adcl 4(%esp),%ebx
- orl %esi,%ecx
- orl %edi,%edx
- andl 24(%esp),%ecx
- andl 28(%esp),%edx
- andl 8(%esp),%esi
- andl 12(%esp),%edi
- orl %esi,%ecx
- orl %edi,%edx
- addl %ecx,%eax
- adcl %edx,%ebx
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- movb (%ebp),%dl
- subl $8,%esp
- leal 8(%ebp),%ebp
- cmpb $148,%dl
- jne .L00600_15_x86
-.align 16
-.L00716_79_x86:
- movl 312(%esp),%ecx
- movl 316(%esp),%edx
- movl %ecx,%esi
- shrl $1,%ecx
- movl %edx,%edi
- shrl $1,%edx
- movl %ecx,%eax
- shll $24,%esi
- movl %edx,%ebx
- shll $24,%edi
- xorl %esi,%ebx
- shrl $6,%ecx
- xorl %edi,%eax
- shrl $6,%edx
- xorl %ecx,%eax
- shll $7,%esi
- xorl %edx,%ebx
- shll $1,%edi
- xorl %esi,%ebx
- shrl $1,%ecx
- xorl %edi,%eax
- shrl $1,%edx
- xorl %ecx,%eax
- shll $6,%edi
- xorl %edx,%ebx
- xorl %edi,%eax
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- movl 208(%esp),%ecx
- movl 212(%esp),%edx
- movl %ecx,%esi
- shrl $6,%ecx
- movl %edx,%edi
- shrl $6,%edx
- movl %ecx,%eax
- shll $3,%esi
- movl %edx,%ebx
- shll $3,%edi
- xorl %esi,%eax
- shrl $13,%ecx
- xorl %edi,%ebx
- shrl $13,%edx
- xorl %ecx,%eax
- shll $10,%esi
- xorl %edx,%ebx
- shll $10,%edi
- xorl %esi,%ebx
- shrl $10,%ecx
- xorl %edi,%eax
- shrl $10,%edx
- xorl %ecx,%ebx
- shll $13,%edi
- xorl %edx,%eax
- xorl %edi,%eax
- movl 320(%esp),%ecx
- movl 324(%esp),%edx
- addl (%esp),%eax
- adcl 4(%esp),%ebx
- movl 248(%esp),%esi
- movl 252(%esp),%edi
- addl %ecx,%eax
- adcl %edx,%ebx
- addl %esi,%eax
- adcl %edi,%ebx
- movl %eax,192(%esp)
- movl %ebx,196(%esp)
- movl 40(%esp),%ecx
- movl 44(%esp),%edx
- movl %ecx,%esi
- shrl $9,%ecx
- movl %edx,%edi
- shrl $9,%edx
- movl %ecx,%ebx
- shll $14,%esi
- movl %edx,%eax
- shll $14,%edi
- xorl %esi,%ebx
- shrl $5,%ecx
- xorl %edi,%eax
- shrl $5,%edx
- xorl %ecx,%eax
- shll $4,%esi
- xorl %edx,%ebx
- shll $4,%edi
- xorl %esi,%ebx
- shrl $4,%ecx
- xorl %edi,%eax
- shrl $4,%edx
- xorl %ecx,%eax
- shll $5,%esi
- xorl %edx,%ebx
- shll $5,%edi
- xorl %esi,%eax
- xorl %edi,%ebx
- movl 48(%esp),%ecx
- movl 52(%esp),%edx
- movl 56(%esp),%esi
- movl 60(%esp),%edi
- addl 64(%esp),%eax
- adcl 68(%esp),%ebx
- xorl %esi,%ecx
- xorl %edi,%edx
- andl 40(%esp),%ecx
- andl 44(%esp),%edx
- addl 192(%esp),%eax
- adcl 196(%esp),%ebx
- xorl %esi,%ecx
- xorl %edi,%edx
- movl (%ebp),%esi
- movl 4(%ebp),%edi
- addl %ecx,%eax
- adcl %edx,%ebx
- movl 32(%esp),%ecx
- movl 36(%esp),%edx
- addl %esi,%eax
- adcl %edi,%ebx
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- addl %ecx,%eax
- adcl %edx,%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- movl %eax,32(%esp)
- movl %ebx,36(%esp)
- movl %ecx,%esi
- shrl $2,%ecx
- movl %edx,%edi
- shrl $2,%edx
- movl %ecx,%ebx
- shll $4,%esi
- movl %edx,%eax
- shll $4,%edi
- xorl %esi,%ebx
- shrl $5,%ecx
- xorl %edi,%eax
- shrl $5,%edx
- xorl %ecx,%ebx
- shll $21,%esi
- xorl %edx,%eax
- shll $21,%edi
- xorl %esi,%eax
- shrl $21,%ecx
- xorl %edi,%ebx
- shrl $21,%edx
- xorl %ecx,%eax
- shll $5,%esi
- xorl %edx,%ebx
- shll $5,%edi
- xorl %esi,%eax
- xorl %edi,%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- addl (%esp),%eax
- adcl 4(%esp),%ebx
- orl %esi,%ecx
- orl %edi,%edx
- andl 24(%esp),%ecx
- andl 28(%esp),%edx
- andl 8(%esp),%esi
- andl 12(%esp),%edi
- orl %esi,%ecx
- orl %edi,%edx
- addl %ecx,%eax
- adcl %edx,%ebx
- movl %eax,(%esp)
- movl %ebx,4(%esp)
- movb (%ebp),%dl
- subl $8,%esp
- leal 8(%ebp),%ebp
- cmpb $23,%dl
- jne .L00716_79_x86
- movl 840(%esp),%esi
- movl 844(%esp),%edi
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edx
- addl 8(%esp),%eax
- adcl 12(%esp),%ebx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- addl 16(%esp),%ecx
- adcl 20(%esp),%edx
- movl %ecx,8(%esi)
- movl %edx,12(%esi)
- movl 16(%esi),%eax
- movl 20(%esi),%ebx
- movl 24(%esi),%ecx
- movl 28(%esi),%edx
- addl 24(%esp),%eax
- adcl 28(%esp),%ebx
- movl %eax,16(%esi)
- movl %ebx,20(%esi)
- addl 32(%esp),%ecx
- adcl 36(%esp),%edx
- movl %ecx,24(%esi)
- movl %edx,28(%esi)
- movl 32(%esi),%eax
- movl 36(%esi),%ebx
- movl 40(%esi),%ecx
- movl 44(%esi),%edx
- addl 40(%esp),%eax
- adcl 44(%esp),%ebx
- movl %eax,32(%esi)
- movl %ebx,36(%esi)
- addl 48(%esp),%ecx
- adcl 52(%esp),%edx
- movl %ecx,40(%esi)
- movl %edx,44(%esi)
- movl 48(%esi),%eax
- movl 52(%esi),%ebx
- movl 56(%esi),%ecx
- movl 60(%esi),%edx
- addl 56(%esp),%eax
- adcl 60(%esp),%ebx
- movl %eax,48(%esi)
- movl %ebx,52(%esi)
- addl 64(%esp),%ecx
- adcl 68(%esp),%edx
- movl %ecx,56(%esi)
- movl %edx,60(%esi)
- addl $840,%esp
- subl $640,%ebp
- cmpl 8(%esp),%edi
- jb .L002loop_x86
- movl 12(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 64
-.L001K512:
-.long 3609767458,1116352408
-.long 602891725,1899447441
-.long 3964484399,3049323471
-.long 2173295548,3921009573
-.long 4081628472,961987163
-.long 3053834265,1508970993
-.long 2937671579,2453635748
-.long 3664609560,2870763221
-.long 2734883394,3624381080
-.long 1164996542,310598401
-.long 1323610764,607225278
-.long 3590304994,1426881987
-.long 4068182383,1925078388
-.long 991336113,2162078206
-.long 633803317,2614888103
-.long 3479774868,3248222580
-.long 2666613458,3835390401
-.long 944711139,4022224774
-.long 2341262773,264347078
-.long 2007800933,604807628
-.long 1495990901,770255983
-.long 1856431235,1249150122
-.long 3175218132,1555081692
-.long 2198950837,1996064986
-.long 3999719339,2554220882
-.long 766784016,2821834349
-.long 2566594879,2952996808
-.long 3203337956,3210313671
-.long 1034457026,3336571891
-.long 2466948901,3584528711
-.long 3758326383,113926993
-.long 168717936,338241895
-.long 1188179964,666307205
-.long 1546045734,773529912
-.long 1522805485,1294757372
-.long 2643833823,1396182291
-.long 2343527390,1695183700
-.long 1014477480,1986661051
-.long 1206759142,2177026350
-.long 344077627,2456956037
-.long 1290863460,2730485921
-.long 3158454273,2820302411
-.long 3505952657,3259730800
-.long 106217008,3345764771
-.long 3606008344,3516065817
-.long 1432725776,3600352804
-.long 1467031594,4094571909
-.long 851169720,275423344
-.long 3100823752,430227734
-.long 1363258195,506948616
-.long 3750685593,659060556
-.long 3785050280,883997877
-.long 3318307427,958139571
-.long 3812723403,1322822218
-.long 2003034995,1537002063
-.long 3602036899,1747873779
-.long 1575990012,1955562222
-.long 1125592928,2024104815
-.long 2716904306,2227730452
-.long 442776044,2361852424
-.long 593698344,2428436474
-.long 3733110249,2756734187
-.long 2999351573,3204031479
-.long 3815920427,3329325298
-.long 3928383900,3391569614
-.long 566280711,3515267271
-.long 3454069534,3940187606
-.long 4000239992,4118630271
-.long 1914138554,116418474
-.long 2731055270,174292421
-.long 3203993006,289380356
-.long 320620315,460393269
-.long 587496836,685471733
-.long 1086792851,852142971
-.long 365543100,1017036298
-.long 2618297676,1126000580
-.long 3409855158,1288033470
-.long 4234509866,1501505948
-.long 987167468,1607167915
-.long 1246189591,1816402316
-.size sha512_block_data_order,.-.L_sha512_block_data_order_begin
-.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
-.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
diff --git a/app/openssl/crypto/sha/asm/sha512-586.pl b/app/openssl/crypto/sha/asm/sha512-586.pl
deleted file mode 100644
index 9f8c51eb..00000000
--- a/app/openssl/crypto/sha/asm/sha512-586.pl
+++ /dev/null
@@ -1,644 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# SHA512 block transform for x86. September 2007.
-#
-# Performance in clock cycles per processed byte (less is better):
-#
-# Pentium PIII P4 AMD K8 Core2
-# gcc 100 75 116 54 66
-# icc 97 77 95 55 57
-# x86 asm 61 56 82 36 40
-# SSE2 asm - - 38 24 20
-# x86_64 asm(*) - - 30 10.0 10.5
-#
-# (*) x86_64 assembler performance is presented for reference
-# purposes.
-#
-# IALU code-path is optimized for elder Pentiums. On vanilla Pentium
-# performance improvement over compiler generated code reaches ~60%,
-# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
-# to 50%, but it's less important as they are expected to execute SSE2
-# code-path, which is commonly ~2-3x faster [than compiler generated
-# code]. SSE2 code-path is as fast as original sha512-sse2.pl, even
-# though it does not use 128-bit operations. The latter means that
-# SSE2-aware kernel is no longer required to execute the code. Another
-# difference is that new code optimizes amount of writes, but at the
-# cost of increased data cache "footprint" by 1/2KB.
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
-
-$sse2=0;
-for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
-
-&external_label("OPENSSL_ia32cap_P") if ($sse2);
-
-$Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp");
-$Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp");
-$Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp");
-$Clo=&DWP(24,"esp"); $Chi=&DWP(24+4,"esp");
-$Dlo=&DWP(32,"esp"); $Dhi=&DWP(32+4,"esp");
-$Elo=&DWP(40,"esp"); $Ehi=&DWP(40+4,"esp");
-$Flo=&DWP(48,"esp"); $Fhi=&DWP(48+4,"esp");
-$Glo=&DWP(56,"esp"); $Ghi=&DWP(56+4,"esp");
-$Hlo=&DWP(64,"esp"); $Hhi=&DWP(64+4,"esp");
-$K512="ebp";
-
-$Asse2=&QWP(0,"esp");
-$Bsse2=&QWP(8,"esp");
-$Csse2=&QWP(16,"esp");
-$Dsse2=&QWP(24,"esp");
-$Esse2=&QWP(32,"esp");
-$Fsse2=&QWP(40,"esp");
-$Gsse2=&QWP(48,"esp");
-$Hsse2=&QWP(56,"esp");
-
-$A="mm0"; # B-D and
-$E="mm4"; # F-H are commonly loaded to respectively mm1-mm3 and
- # mm5-mm7, but it's done on on-demand basis...
-
-sub BODY_00_15_sse2 {
- my $prefetch=shift;
-
- &movq ("mm5",$Fsse2); # load f
- &movq ("mm6",$Gsse2); # load g
- &movq ("mm7",$Hsse2); # load h
-
- &movq ("mm1",$E); # %mm1 is sliding right
- &movq ("mm2",$E); # %mm2 is sliding left
- &psrlq ("mm1",14);
- &movq ($Esse2,$E); # modulo-scheduled save e
- &psllq ("mm2",23);
- &movq ("mm3","mm1"); # %mm3 is T1
- &psrlq ("mm1",4);
- &pxor ("mm3","mm2");
- &psllq ("mm2",23);
- &pxor ("mm3","mm1");
- &psrlq ("mm1",23);
- &pxor ("mm3","mm2");
- &psllq ("mm2",4);
- &pxor ("mm3","mm1");
- &paddq ("mm7",QWP(0,$K512)); # h+=K512[i]
- &pxor ("mm3","mm2"); # T1=Sigma1_512(e)
-
- &pxor ("mm5","mm6"); # f^=g
- &movq ("mm1",$Bsse2); # load b
- &pand ("mm5",$E); # f&=e
- &movq ("mm2",$Csse2); # load c
- &pxor ("mm5","mm6"); # f^=g
- &movq ($E,$Dsse2); # e = load d
- &paddq ("mm3","mm5"); # T1+=Ch(e,f,g)
- &movq (&QWP(0,"esp"),$A); # modulo-scheduled save a
- &paddq ("mm3","mm7"); # T1+=h
-
- &movq ("mm5",$A); # %mm5 is sliding right
- &movq ("mm6",$A); # %mm6 is sliding left
- &paddq ("mm3",&QWP(8*9,"esp")); # T1+=X[0]
- &psrlq ("mm5",28);
- &paddq ($E,"mm3"); # e += T1
- &psllq ("mm6",25);
- &movq ("mm7","mm5"); # %mm7 is T2
- &psrlq ("mm5",6);
- &pxor ("mm7","mm6");
- &psllq ("mm6",5);
- &pxor ("mm7","mm5");
- &psrlq ("mm5",5);
- &pxor ("mm7","mm6");
- &psllq ("mm6",6);
- &pxor ("mm7","mm5");
- &sub ("esp",8);
- &pxor ("mm7","mm6"); # T2=Sigma0_512(a)
-
- &movq ("mm5",$A); # %mm5=a
- &por ($A,"mm2"); # a=a|c
- &movq ("mm6",&QWP(8*(9+16-14),"esp")) if ($prefetch);
- &pand ("mm5","mm2"); # %mm5=a&c
- &pand ($A,"mm1"); # a=(a|c)&b
- &movq ("mm2",&QWP(8*(9+16-1),"esp")) if ($prefetch);
- &por ("mm5",$A); # %mm5=(a&c)|((a|c)&b)
- &paddq ("mm7","mm5"); # T2+=Maj(a,b,c)
- &movq ($A,"mm3"); # a=T1
-
- &mov (&LB("edx"),&BP(0,$K512));
- &paddq ($A,"mm7"); # a+=T2
- &add ($K512,8);
-}
-
-sub BODY_00_15_x86 {
- #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- # LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- # HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- &mov ("ecx",$Elo);
- &mov ("edx",$Ehi);
- &mov ("esi","ecx");
-
- &shr ("ecx",9); # lo>>9
- &mov ("edi","edx");
- &shr ("edx",9); # hi>>9
- &mov ("ebx","ecx");
- &shl ("esi",14); # lo<<14
- &mov ("eax","edx");
- &shl ("edi",14); # hi<<14
- &xor ("ebx","esi");
-
- &shr ("ecx",14-9); # lo>>14
- &xor ("eax","edi");
- &shr ("edx",14-9); # hi>>14
- &xor ("eax","ecx");
- &shl ("esi",18-14); # lo<<18
- &xor ("ebx","edx");
- &shl ("edi",18-14); # hi<<18
- &xor ("ebx","esi");
-
- &shr ("ecx",18-14); # lo>>18
- &xor ("eax","edi");
- &shr ("edx",18-14); # hi>>18
- &xor ("eax","ecx");
- &shl ("esi",23-18); # lo<<23
- &xor ("ebx","edx");
- &shl ("edi",23-18); # hi<<23
- &xor ("eax","esi");
- &xor ("ebx","edi"); # T1 = Sigma1(e)
-
- &mov ("ecx",$Flo);
- &mov ("edx",$Fhi);
- &mov ("esi",$Glo);
- &mov ("edi",$Ghi);
- &add ("eax",$Hlo);
- &adc ("ebx",$Hhi); # T1 += h
- &xor ("ecx","esi");
- &xor ("edx","edi");
- &and ("ecx",$Elo);
- &and ("edx",$Ehi);
- &add ("eax",&DWP(8*(9+15)+0,"esp"));
- &adc ("ebx",&DWP(8*(9+15)+4,"esp")); # T1 += X[0]
- &xor ("ecx","esi");
- &xor ("edx","edi"); # Ch(e,f,g) = (f^g)&e)^g
-
- &mov ("esi",&DWP(0,$K512));
- &mov ("edi",&DWP(4,$K512)); # K[i]
- &add ("eax","ecx");
- &adc ("ebx","edx"); # T1 += Ch(e,f,g)
- &mov ("ecx",$Dlo);
- &mov ("edx",$Dhi);
- &add ("eax","esi");
- &adc ("ebx","edi"); # T1 += K[i]
- &mov ($Tlo,"eax");
- &mov ($Thi,"ebx"); # put T1 away
- &add ("eax","ecx");
- &adc ("ebx","edx"); # d += T1
-
- #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- # LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- # HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- &mov ("ecx",$Alo);
- &mov ("edx",$Ahi);
- &mov ($Dlo,"eax");
- &mov ($Dhi,"ebx");
- &mov ("esi","ecx");
-
- &shr ("ecx",2); # lo>>2
- &mov ("edi","edx");
- &shr ("edx",2); # hi>>2
- &mov ("ebx","ecx");
- &shl ("esi",4); # lo<<4
- &mov ("eax","edx");
- &shl ("edi",4); # hi<<4
- &xor ("ebx","esi");
-
- &shr ("ecx",7-2); # lo>>7
- &xor ("eax","edi");
- &shr ("edx",7-2); # hi>>7
- &xor ("ebx","ecx");
- &shl ("esi",25-4); # lo<<25
- &xor ("eax","edx");
- &shl ("edi",25-4); # hi<<25
- &xor ("eax","esi");
-
- &shr ("ecx",28-7); # lo>>28
- &xor ("ebx","edi");
- &shr ("edx",28-7); # hi>>28
- &xor ("eax","ecx");
- &shl ("esi",30-25); # lo<<30
- &xor ("ebx","edx");
- &shl ("edi",30-25); # hi<<30
- &xor ("eax","esi");
- &xor ("ebx","edi"); # Sigma0(a)
-
- &mov ("ecx",$Alo);
- &mov ("edx",$Ahi);
- &mov ("esi",$Blo);
- &mov ("edi",$Bhi);
- &add ("eax",$Tlo);
- &adc ("ebx",$Thi); # T1 = Sigma0(a)+T1
- &or ("ecx","esi");
- &or ("edx","edi");
- &and ("ecx",$Clo);
- &and ("edx",$Chi);
- &and ("esi",$Alo);
- &and ("edi",$Ahi);
- &or ("ecx","esi");
- &or ("edx","edi"); # Maj(a,b,c) = ((a|b)&c)|(a&b)
-
- &add ("eax","ecx");
- &adc ("ebx","edx"); # T1 += Maj(a,b,c)
- &mov ($Tlo,"eax");
- &mov ($Thi,"ebx");
-
- &mov (&LB("edx"),&BP(0,$K512)); # pre-fetch LSB of *K
- &sub ("esp",8);
- &lea ($K512,&DWP(8,$K512)); # K++
-}
-
-
-&function_begin("sha512_block_data_order");
- &mov ("esi",wparam(0)); # ctx
- &mov ("edi",wparam(1)); # inp
- &mov ("eax",wparam(2)); # num
- &mov ("ebx","esp"); # saved sp
-
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K512);
- &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
-
- &sub ("esp",16);
- &and ("esp",-64);
-
- &shl ("eax",7);
- &add ("eax","edi");
- &mov (&DWP(0,"esp"),"esi"); # ctx
- &mov (&DWP(4,"esp"),"edi"); # inp
- &mov (&DWP(8,"esp"),"eax"); # inp+num*128
- &mov (&DWP(12,"esp"),"ebx"); # saved sp
-
-if ($sse2) {
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
- &bt (&DWP(0,"edx"),26);
- &jnc (&label("loop_x86"));
-
- # load ctx->h[0-7]
- &movq ($A,&QWP(0,"esi"));
- &movq ("mm1",&QWP(8,"esi"));
- &movq ("mm2",&QWP(16,"esi"));
- &movq ("mm3",&QWP(24,"esi"));
- &movq ($E,&QWP(32,"esi"));
- &movq ("mm5",&QWP(40,"esi"));
- &movq ("mm6",&QWP(48,"esi"));
- &movq ("mm7",&QWP(56,"esi"));
- &sub ("esp",8*10);
-
-&set_label("loop_sse2",16);
- # &movq ($Asse2,$A);
- &movq ($Bsse2,"mm1");
- &movq ($Csse2,"mm2");
- &movq ($Dsse2,"mm3");
- # &movq ($Esse2,$E);
- &movq ($Fsse2,"mm5");
- &movq ($Gsse2,"mm6");
- &movq ($Hsse2,"mm7");
-
- &mov ("ecx",&DWP(0,"edi"));
- &mov ("edx",&DWP(4,"edi"));
- &add ("edi",8);
- &bswap ("ecx");
- &bswap ("edx");
- &mov (&DWP(8*9+4,"esp"),"ecx");
- &mov (&DWP(8*9+0,"esp"),"edx");
-
-&set_label("00_14_sse2",16);
- &mov ("eax",&DWP(0,"edi"));
- &mov ("ebx",&DWP(4,"edi"));
- &add ("edi",8);
- &bswap ("eax");
- &bswap ("ebx");
- &mov (&DWP(8*8+4,"esp"),"eax");
- &mov (&DWP(8*8+0,"esp"),"ebx");
-
- &BODY_00_15_sse2();
-
- &cmp (&LB("edx"),0x35);
- &jne (&label("00_14_sse2"));
-
- &BODY_00_15_sse2(1);
-
-&set_label("16_79_sse2",16);
- #&movq ("mm2",&QWP(8*(9+16-1),"esp")); #prefetched in BODY_00_15
- #&movq ("mm6",&QWP(8*(9+16-14),"esp"));
- &movq ("mm1","mm2");
-
- &psrlq ("mm2",1);
- &movq ("mm7","mm6");
- &psrlq ("mm6",6);
- &movq ("mm3","mm2");
-
- &psrlq ("mm2",7-1);
- &movq ("mm5","mm6");
- &psrlq ("mm6",19-6);
- &pxor ("mm3","mm2");
-
- &psrlq ("mm2",8-7);
- &pxor ("mm5","mm6");
- &psrlq ("mm6",61-19);
- &pxor ("mm3","mm2");
-
- &movq ("mm2",&QWP(8*(9+16),"esp"));
-
- &psllq ("mm1",56);
- &pxor ("mm5","mm6");
- &psllq ("mm7",3);
- &pxor ("mm3","mm1");
-
- &paddq ("mm2",&QWP(8*(9+16-9),"esp"));
-
- &psllq ("mm1",63-56);
- &pxor ("mm5","mm7");
- &psllq ("mm7",45-3);
- &pxor ("mm3","mm1");
- &pxor ("mm5","mm7");
-
- &paddq ("mm3","mm5");
- &paddq ("mm3","mm2");
- &movq (&QWP(8*9,"esp"),"mm3");
-
- &BODY_00_15_sse2(1);
-
- &cmp (&LB("edx"),0x17);
- &jne (&label("16_79_sse2"));
-
- # &movq ($A,$Asse2);
- &movq ("mm1",$Bsse2);
- &movq ("mm2",$Csse2);
- &movq ("mm3",$Dsse2);
- # &movq ($E,$Esse2);
- &movq ("mm5",$Fsse2);
- &movq ("mm6",$Gsse2);
- &movq ("mm7",$Hsse2);
-
- &paddq ($A,&QWP(0,"esi"));
- &paddq ("mm1",&QWP(8,"esi"));
- &paddq ("mm2",&QWP(16,"esi"));
- &paddq ("mm3",&QWP(24,"esi"));
- &paddq ($E,&QWP(32,"esi"));
- &paddq ("mm5",&QWP(40,"esi"));
- &paddq ("mm6",&QWP(48,"esi"));
- &paddq ("mm7",&QWP(56,"esi"));
-
- &movq (&QWP(0,"esi"),$A);
- &movq (&QWP(8,"esi"),"mm1");
- &movq (&QWP(16,"esi"),"mm2");
- &movq (&QWP(24,"esi"),"mm3");
- &movq (&QWP(32,"esi"),$E);
- &movq (&QWP(40,"esi"),"mm5");
- &movq (&QWP(48,"esi"),"mm6");
- &movq (&QWP(56,"esi"),"mm7");
-
- &add ("esp",8*80); # destroy frame
- &sub ($K512,8*80); # rewind K
-
- &cmp ("edi",&DWP(8*10+8,"esp")); # are we done yet?
- &jb (&label("loop_sse2"));
-
- &emms ();
- &mov ("esp",&DWP(8*10+12,"esp")); # restore sp
-&function_end_A();
-}
-&set_label("loop_x86",16);
- # copy input block to stack reversing byte and qword order
- for ($i=0;$i<8;$i++) {
- &mov ("eax",&DWP($i*16+0,"edi"));
- &mov ("ebx",&DWP($i*16+4,"edi"));
- &mov ("ecx",&DWP($i*16+8,"edi"));
- &mov ("edx",&DWP($i*16+12,"edi"));
- &bswap ("eax");
- &bswap ("ebx");
- &bswap ("ecx");
- &bswap ("edx");
- &push ("eax");
- &push ("ebx");
- &push ("ecx");
- &push ("edx");
- }
- &add ("edi",128);
- &sub ("esp",9*8); # place for T,A,B,C,D,E,F,G,H
- &mov (&DWP(8*(9+16)+4,"esp"),"edi");
-
- # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
- &lea ("edi",&DWP(8,"esp"));
- &mov ("ecx",16);
- &data_word(0xA5F3F689); # rep movsd
-
-&set_label("00_15_x86",16);
- &BODY_00_15_x86();
-
- &cmp (&LB("edx"),0x94);
- &jne (&label("00_15_x86"));
-
-&set_label("16_79_x86",16);
- #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
- # LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
- # HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
- &mov ("ecx",&DWP(8*(9+15+16-1)+0,"esp"));
- &mov ("edx",&DWP(8*(9+15+16-1)+4,"esp"));
- &mov ("esi","ecx");
-
- &shr ("ecx",1); # lo>>1
- &mov ("edi","edx");
- &shr ("edx",1); # hi>>1
- &mov ("eax","ecx");
- &shl ("esi",24); # lo<<24
- &mov ("ebx","edx");
- &shl ("edi",24); # hi<<24
- &xor ("ebx","esi");
-
- &shr ("ecx",7-1); # lo>>7
- &xor ("eax","edi");
- &shr ("edx",7-1); # hi>>7
- &xor ("eax","ecx");
- &shl ("esi",31-24); # lo<<31
- &xor ("ebx","edx");
- &shl ("edi",25-24); # hi<<25
- &xor ("ebx","esi");
-
- &shr ("ecx",8-7); # lo>>8
- &xor ("eax","edi");
- &shr ("edx",8-7); # hi>>8
- &xor ("eax","ecx");
- &shl ("edi",31-25); # hi<<31
- &xor ("ebx","edx");
- &xor ("eax","edi"); # T1 = sigma0(X[-15])
-
- &mov (&DWP(0,"esp"),"eax");
- &mov (&DWP(4,"esp"),"ebx"); # put T1 away
-
- #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
- # LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
- # HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
- &mov ("ecx",&DWP(8*(9+15+16-14)+0,"esp"));
- &mov ("edx",&DWP(8*(9+15+16-14)+4,"esp"));
- &mov ("esi","ecx");
-
- &shr ("ecx",6); # lo>>6
- &mov ("edi","edx");
- &shr ("edx",6); # hi>>6
- &mov ("eax","ecx");
- &shl ("esi",3); # lo<<3
- &mov ("ebx","edx");
- &shl ("edi",3); # hi<<3
- &xor ("eax","esi");
-
- &shr ("ecx",19-6); # lo>>19
- &xor ("ebx","edi");
- &shr ("edx",19-6); # hi>>19
- &xor ("eax","ecx");
- &shl ("esi",13-3); # lo<<13
- &xor ("ebx","edx");
- &shl ("edi",13-3); # hi<<13
- &xor ("ebx","esi");
-
- &shr ("ecx",29-19); # lo>>29
- &xor ("eax","edi");
- &shr ("edx",29-19); # hi>>29
- &xor ("ebx","ecx");
- &shl ("edi",26-13); # hi<<26
- &xor ("eax","edx");
- &xor ("eax","edi"); # sigma1(X[-2])
-
- &mov ("ecx",&DWP(8*(9+15+16)+0,"esp"));
- &mov ("edx",&DWP(8*(9+15+16)+4,"esp"));
- &add ("eax",&DWP(0,"esp"));
- &adc ("ebx",&DWP(4,"esp")); # T1 = sigma1(X[-2])+T1
- &mov ("esi",&DWP(8*(9+15+16-9)+0,"esp"));
- &mov ("edi",&DWP(8*(9+15+16-9)+4,"esp"));
- &add ("eax","ecx");
- &adc ("ebx","edx"); # T1 += X[-16]
- &add ("eax","esi");
- &adc ("ebx","edi"); # T1 += X[-7]
- &mov (&DWP(8*(9+15)+0,"esp"),"eax");
- &mov (&DWP(8*(9+15)+4,"esp"),"ebx"); # save X[0]
-
- &BODY_00_15_x86();
-
- &cmp (&LB("edx"),0x17);
- &jne (&label("16_79_x86"));
-
- &mov ("esi",&DWP(8*(9+16+80)+0,"esp"));# ctx
- &mov ("edi",&DWP(8*(9+16+80)+4,"esp"));# inp
- for($i=0;$i<4;$i++) {
- &mov ("eax",&DWP($i*16+0,"esi"));
- &mov ("ebx",&DWP($i*16+4,"esi"));
- &mov ("ecx",&DWP($i*16+8,"esi"));
- &mov ("edx",&DWP($i*16+12,"esi"));
- &add ("eax",&DWP(8+($i*16)+0,"esp"));
- &adc ("ebx",&DWP(8+($i*16)+4,"esp"));
- &mov (&DWP($i*16+0,"esi"),"eax");
- &mov (&DWP($i*16+4,"esi"),"ebx");
- &add ("ecx",&DWP(8+($i*16)+8,"esp"));
- &adc ("edx",&DWP(8+($i*16)+12,"esp"));
- &mov (&DWP($i*16+8,"esi"),"ecx");
- &mov (&DWP($i*16+12,"esi"),"edx");
- }
- &add ("esp",8*(9+16+80)); # destroy frame
- &sub ($K512,8*80); # rewind K
-
- &cmp ("edi",&DWP(8,"esp")); # are we done yet?
- &jb (&label("loop_x86"));
-
- &mov ("esp",&DWP(12,"esp")); # restore sp
-&function_end_A();
-
-&set_label("K512",64); # Yes! I keep it in the code segment!
- &data_word(0xd728ae22,0x428a2f98); # u64
- &data_word(0x23ef65cd,0x71374491); # u64
- &data_word(0xec4d3b2f,0xb5c0fbcf); # u64
- &data_word(0x8189dbbc,0xe9b5dba5); # u64
- &data_word(0xf348b538,0x3956c25b); # u64
- &data_word(0xb605d019,0x59f111f1); # u64
- &data_word(0xaf194f9b,0x923f82a4); # u64
- &data_word(0xda6d8118,0xab1c5ed5); # u64
- &data_word(0xa3030242,0xd807aa98); # u64
- &data_word(0x45706fbe,0x12835b01); # u64
- &data_word(0x4ee4b28c,0x243185be); # u64
- &data_word(0xd5ffb4e2,0x550c7dc3); # u64
- &data_word(0xf27b896f,0x72be5d74); # u64
- &data_word(0x3b1696b1,0x80deb1fe); # u64
- &data_word(0x25c71235,0x9bdc06a7); # u64
- &data_word(0xcf692694,0xc19bf174); # u64
- &data_word(0x9ef14ad2,0xe49b69c1); # u64
- &data_word(0x384f25e3,0xefbe4786); # u64
- &data_word(0x8b8cd5b5,0x0fc19dc6); # u64
- &data_word(0x77ac9c65,0x240ca1cc); # u64
- &data_word(0x592b0275,0x2de92c6f); # u64
- &data_word(0x6ea6e483,0x4a7484aa); # u64
- &data_word(0xbd41fbd4,0x5cb0a9dc); # u64
- &data_word(0x831153b5,0x76f988da); # u64
- &data_word(0xee66dfab,0x983e5152); # u64
- &data_word(0x2db43210,0xa831c66d); # u64
- &data_word(0x98fb213f,0xb00327c8); # u64
- &data_word(0xbeef0ee4,0xbf597fc7); # u64
- &data_word(0x3da88fc2,0xc6e00bf3); # u64
- &data_word(0x930aa725,0xd5a79147); # u64
- &data_word(0xe003826f,0x06ca6351); # u64
- &data_word(0x0a0e6e70,0x14292967); # u64
- &data_word(0x46d22ffc,0x27b70a85); # u64
- &data_word(0x5c26c926,0x2e1b2138); # u64
- &data_word(0x5ac42aed,0x4d2c6dfc); # u64
- &data_word(0x9d95b3df,0x53380d13); # u64
- &data_word(0x8baf63de,0x650a7354); # u64
- &data_word(0x3c77b2a8,0x766a0abb); # u64
- &data_word(0x47edaee6,0x81c2c92e); # u64
- &data_word(0x1482353b,0x92722c85); # u64
- &data_word(0x4cf10364,0xa2bfe8a1); # u64
- &data_word(0xbc423001,0xa81a664b); # u64
- &data_word(0xd0f89791,0xc24b8b70); # u64
- &data_word(0x0654be30,0xc76c51a3); # u64
- &data_word(0xd6ef5218,0xd192e819); # u64
- &data_word(0x5565a910,0xd6990624); # u64
- &data_word(0x5771202a,0xf40e3585); # u64
- &data_word(0x32bbd1b8,0x106aa070); # u64
- &data_word(0xb8d2d0c8,0x19a4c116); # u64
- &data_word(0x5141ab53,0x1e376c08); # u64
- &data_word(0xdf8eeb99,0x2748774c); # u64
- &data_word(0xe19b48a8,0x34b0bcb5); # u64
- &data_word(0xc5c95a63,0x391c0cb3); # u64
- &data_word(0xe3418acb,0x4ed8aa4a); # u64
- &data_word(0x7763e373,0x5b9cca4f); # u64
- &data_word(0xd6b2b8a3,0x682e6ff3); # u64
- &data_word(0x5defb2fc,0x748f82ee); # u64
- &data_word(0x43172f60,0x78a5636f); # u64
- &data_word(0xa1f0ab72,0x84c87814); # u64
- &data_word(0x1a6439ec,0x8cc70208); # u64
- &data_word(0x23631e28,0x90befffa); # u64
- &data_word(0xde82bde9,0xa4506ceb); # u64
- &data_word(0xb2c67915,0xbef9a3f7); # u64
- &data_word(0xe372532b,0xc67178f2); # u64
- &data_word(0xea26619c,0xca273ece); # u64
- &data_word(0x21c0c207,0xd186b8c7); # u64
- &data_word(0xcde0eb1e,0xeada7dd6); # u64
- &data_word(0xee6ed178,0xf57d4f7f); # u64
- &data_word(0x72176fba,0x06f067aa); # u64
- &data_word(0xa2c898a6,0x0a637dc5); # u64
- &data_word(0xbef90dae,0x113f9804); # u64
- &data_word(0x131c471b,0x1b710b35); # u64
- &data_word(0x23047d84,0x28db77f5); # u64
- &data_word(0x40c72493,0x32caab7b); # u64
- &data_word(0x15c9bebc,0x3c9ebe0a); # u64
- &data_word(0x9c100d4c,0x431d67c4); # u64
- &data_word(0xcb3e42b6,0x4cc5d4be); # u64
- &data_word(0xfc657e2a,0x597f299c); # u64
- &data_word(0x3ad6faec,0x5fcb6fab); # u64
- &data_word(0x4a475817,0x6c44198c); # u64
-&function_end_B("sha512_block_data_order");
-&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.S b/app/openssl/crypto/sha/asm/sha512-armv4.S
deleted file mode 100644
index fd462771..00000000
--- a/app/openssl/crypto/sha/asm/sha512-armv4.S
+++ /dev/null
@@ -1,1783 +0,0 @@
-#include "arm_arch.h"
-#ifdef __ARMEL__
-# define LO 0
-# define HI 4
-# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
-#else
-# define HI 0
-# define LO 4
-# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
-#endif
-
-.text
-.code 32
-.type K512,%object
-.align 5
-K512:
-WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
-WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
-WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
-WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
-WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
-WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
-WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
-WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
-WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
-WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
-WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
-WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
-WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
-WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
-WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
-WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
-WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
-WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
-WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
-WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
-WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
-WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
-WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
-WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
-WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
-WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
-WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
-WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
-WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
-WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
-WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
-WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
-WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
-WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
-WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
-WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
-WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
-WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
-WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
-WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
-.size K512,.-K512
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha512_block_data_order
-.skip 32-4
-
-.global sha512_block_data_order
-.type sha512_block_data_order,%function
-sha512_block_data_order:
- sub r3,pc,#8 @ sha512_block_data_order
- add r2,r1,r2,lsl#7 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#1
- bne .LNEON
-#endif
- stmdb sp!,{r4-r12,lr}
- sub r14,r3,#672 @ K512
- sub sp,sp,#9*8
-
- ldr r7,[r0,#32+LO]
- ldr r8,[r0,#32+HI]
- ldr r9, [r0,#48+LO]
- ldr r10, [r0,#48+HI]
- ldr r11, [r0,#56+LO]
- ldr r12, [r0,#56+HI]
-.Loop:
- str r9, [sp,#48+0]
- str r10, [sp,#48+4]
- str r11, [sp,#56+0]
- str r12, [sp,#56+4]
- ldr r5,[r0,#0+LO]
- ldr r6,[r0,#0+HI]
- ldr r3,[r0,#8+LO]
- ldr r4,[r0,#8+HI]
- ldr r9, [r0,#16+LO]
- ldr r10, [r0,#16+HI]
- ldr r11, [r0,#24+LO]
- ldr r12, [r0,#24+HI]
- str r3,[sp,#8+0]
- str r4,[sp,#8+4]
- str r9, [sp,#16+0]
- str r10, [sp,#16+4]
- str r11, [sp,#24+0]
- str r12, [sp,#24+4]
- ldr r3,[r0,#40+LO]
- ldr r4,[r0,#40+HI]
- str r3,[sp,#40+0]
- str r4,[sp,#40+4]
-
-.L00_15:
-#if __ARM_ARCH__<7
- ldrb r3,[r1,#7]
- ldrb r9, [r1,#6]
- ldrb r10, [r1,#5]
- ldrb r11, [r1,#4]
- ldrb r4,[r1,#3]
- ldrb r12, [r1,#2]
- orr r3,r3,r9,lsl#8
- ldrb r9, [r1,#1]
- orr r3,r3,r10,lsl#16
- ldrb r10, [r1],#8
- orr r3,r3,r11,lsl#24
- orr r4,r4,r12,lsl#8
- orr r4,r4,r9,lsl#16
- orr r4,r4,r10,lsl#24
-#else
- ldr r3,[r1,#4]
- ldr r4,[r1],#8
-#ifdef __ARMEL__
- rev r3,r3
- rev r4,r4
-#endif
-#endif
- @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- mov r9,r7,lsr#14
- str r3,[sp,#64+0]
- mov r10,r8,lsr#14
- str r4,[sp,#64+4]
- eor r9,r9,r8,lsl#18
- ldr r11,[sp,#56+0] @ h.lo
- eor r10,r10,r7,lsl#18
- ldr r12,[sp,#56+4] @ h.hi
- eor r9,r9,r7,lsr#18
- eor r10,r10,r8,lsr#18
- eor r9,r9,r8,lsl#14
- eor r10,r10,r7,lsl#14
- eor r9,r9,r8,lsr#9
- eor r10,r10,r7,lsr#9
- eor r9,r9,r7,lsl#23
- eor r10,r10,r8,lsl#23 @ Sigma1(e)
- adds r3,r3,r9
- ldr r9,[sp,#40+0] @ f.lo
- adc r4,r4,r10 @ T += Sigma1(e)
- ldr r10,[sp,#40+4] @ f.hi
- adds r3,r3,r11
- ldr r11,[sp,#48+0] @ g.lo
- adc r4,r4,r12 @ T += h
- ldr r12,[sp,#48+4] @ g.hi
-
- eor r9,r9,r11
- str r7,[sp,#32+0]
- eor r10,r10,r12
- str r8,[sp,#32+4]
- and r9,r9,r7
- str r5,[sp,#0+0]
- and r10,r10,r8
- str r6,[sp,#0+4]
- eor r9,r9,r11
- ldr r11,[r14,#LO] @ K[i].lo
- eor r10,r10,r12 @ Ch(e,f,g)
- ldr r12,[r14,#HI] @ K[i].hi
-
- adds r3,r3,r9
- ldr r7,[sp,#24+0] @ d.lo
- adc r4,r4,r10 @ T += Ch(e,f,g)
- ldr r8,[sp,#24+4] @ d.hi
- adds r3,r3,r11
- and r9,r11,#0xff
- adc r4,r4,r12 @ T += K[i]
- adds r7,r7,r3
- ldr r11,[sp,#8+0] @ b.lo
- adc r8,r8,r4 @ d += T
- teq r9,#148
-
- ldr r12,[sp,#16+0] @ c.lo
- orreq r14,r14,#1
- @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- mov r9,r5,lsr#28
- mov r10,r6,lsr#28
- eor r9,r9,r6,lsl#4
- eor r10,r10,r5,lsl#4
- eor r9,r9,r6,lsr#2
- eor r10,r10,r5,lsr#2
- eor r9,r9,r5,lsl#30
- eor r10,r10,r6,lsl#30
- eor r9,r9,r6,lsr#7
- eor r10,r10,r5,lsr#7
- eor r9,r9,r5,lsl#25
- eor r10,r10,r6,lsl#25 @ Sigma0(a)
- adds r3,r3,r9
- and r9,r5,r11
- adc r4,r4,r10 @ T += Sigma0(a)
-
- ldr r10,[sp,#8+4] @ b.hi
- orr r5,r5,r11
- ldr r11,[sp,#16+4] @ c.hi
- and r5,r5,r12
- and r12,r6,r10
- orr r6,r6,r10
- orr r5,r5,r9 @ Maj(a,b,c).lo
- and r6,r6,r11
- adds r5,r5,r3
- orr r6,r6,r12 @ Maj(a,b,c).hi
- sub sp,sp,#8
- adc r6,r6,r4 @ h += T
- tst r14,#1
- add r14,r14,#8
- tst r14,#1
- beq .L00_15
- ldr r9,[sp,#184+0]
- ldr r10,[sp,#184+4]
- bic r14,r14,#1
-.L16_79:
- @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
- @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
- @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
- mov r3,r9,lsr#1
- ldr r11,[sp,#80+0]
- mov r4,r10,lsr#1
- ldr r12,[sp,#80+4]
- eor r3,r3,r10,lsl#31
- eor r4,r4,r9,lsl#31
- eor r3,r3,r9,lsr#8
- eor r4,r4,r10,lsr#8
- eor r3,r3,r10,lsl#24
- eor r4,r4,r9,lsl#24
- eor r3,r3,r9,lsr#7
- eor r4,r4,r10,lsr#7
- eor r3,r3,r10,lsl#25
-
- @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
- @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
- @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
- mov r9,r11,lsr#19
- mov r10,r12,lsr#19
- eor r9,r9,r12,lsl#13
- eor r10,r10,r11,lsl#13
- eor r9,r9,r12,lsr#29
- eor r10,r10,r11,lsr#29
- eor r9,r9,r11,lsl#3
- eor r10,r10,r12,lsl#3
- eor r9,r9,r11,lsr#6
- eor r10,r10,r12,lsr#6
- ldr r11,[sp,#120+0]
- eor r9,r9,r12,lsl#26
-
- ldr r12,[sp,#120+4]
- adds r3,r3,r9
- ldr r9,[sp,#192+0]
- adc r4,r4,r10
-
- ldr r10,[sp,#192+4]
- adds r3,r3,r11
- adc r4,r4,r12
- adds r3,r3,r9
- adc r4,r4,r10
- @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- mov r9,r7,lsr#14
- str r3,[sp,#64+0]
- mov r10,r8,lsr#14
- str r4,[sp,#64+4]
- eor r9,r9,r8,lsl#18
- ldr r11,[sp,#56+0] @ h.lo
- eor r10,r10,r7,lsl#18
- ldr r12,[sp,#56+4] @ h.hi
- eor r9,r9,r7,lsr#18
- eor r10,r10,r8,lsr#18
- eor r9,r9,r8,lsl#14
- eor r10,r10,r7,lsl#14
- eor r9,r9,r8,lsr#9
- eor r10,r10,r7,lsr#9
- eor r9,r9,r7,lsl#23
- eor r10,r10,r8,lsl#23 @ Sigma1(e)
- adds r3,r3,r9
- ldr r9,[sp,#40+0] @ f.lo
- adc r4,r4,r10 @ T += Sigma1(e)
- ldr r10,[sp,#40+4] @ f.hi
- adds r3,r3,r11
- ldr r11,[sp,#48+0] @ g.lo
- adc r4,r4,r12 @ T += h
- ldr r12,[sp,#48+4] @ g.hi
-
- eor r9,r9,r11
- str r7,[sp,#32+0]
- eor r10,r10,r12
- str r8,[sp,#32+4]
- and r9,r9,r7
- str r5,[sp,#0+0]
- and r10,r10,r8
- str r6,[sp,#0+4]
- eor r9,r9,r11
- ldr r11,[r14,#LO] @ K[i].lo
- eor r10,r10,r12 @ Ch(e,f,g)
- ldr r12,[r14,#HI] @ K[i].hi
-
- adds r3,r3,r9
- ldr r7,[sp,#24+0] @ d.lo
- adc r4,r4,r10 @ T += Ch(e,f,g)
- ldr r8,[sp,#24+4] @ d.hi
- adds r3,r3,r11
- and r9,r11,#0xff
- adc r4,r4,r12 @ T += K[i]
- adds r7,r7,r3
- ldr r11,[sp,#8+0] @ b.lo
- adc r8,r8,r4 @ d += T
- teq r9,#23
-
- ldr r12,[sp,#16+0] @ c.lo
- orreq r14,r14,#1
- @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- mov r9,r5,lsr#28
- mov r10,r6,lsr#28
- eor r9,r9,r6,lsl#4
- eor r10,r10,r5,lsl#4
- eor r9,r9,r6,lsr#2
- eor r10,r10,r5,lsr#2
- eor r9,r9,r5,lsl#30
- eor r10,r10,r6,lsl#30
- eor r9,r9,r6,lsr#7
- eor r10,r10,r5,lsr#7
- eor r9,r9,r5,lsl#25
- eor r10,r10,r6,lsl#25 @ Sigma0(a)
- adds r3,r3,r9
- and r9,r5,r11
- adc r4,r4,r10 @ T += Sigma0(a)
-
- ldr r10,[sp,#8+4] @ b.hi
- orr r5,r5,r11
- ldr r11,[sp,#16+4] @ c.hi
- and r5,r5,r12
- and r12,r6,r10
- orr r6,r6,r10
- orr r5,r5,r9 @ Maj(a,b,c).lo
- and r6,r6,r11
- adds r5,r5,r3
- orr r6,r6,r12 @ Maj(a,b,c).hi
- sub sp,sp,#8
- adc r6,r6,r4 @ h += T
- tst r14,#1
- add r14,r14,#8
- ldreq r9,[sp,#184+0]
- ldreq r10,[sp,#184+4]
- beq .L16_79
- bic r14,r14,#1
-
- ldr r3,[sp,#8+0]
- ldr r4,[sp,#8+4]
- ldr r9, [r0,#0+LO]
- ldr r10, [r0,#0+HI]
- ldr r11, [r0,#8+LO]
- ldr r12, [r0,#8+HI]
- adds r9,r5,r9
- str r9, [r0,#0+LO]
- adc r10,r6,r10
- str r10, [r0,#0+HI]
- adds r11,r3,r11
- str r11, [r0,#8+LO]
- adc r12,r4,r12
- str r12, [r0,#8+HI]
-
- ldr r5,[sp,#16+0]
- ldr r6,[sp,#16+4]
- ldr r3,[sp,#24+0]
- ldr r4,[sp,#24+4]
- ldr r9, [r0,#16+LO]
- ldr r10, [r0,#16+HI]
- ldr r11, [r0,#24+LO]
- ldr r12, [r0,#24+HI]
- adds r9,r5,r9
- str r9, [r0,#16+LO]
- adc r10,r6,r10
- str r10, [r0,#16+HI]
- adds r11,r3,r11
- str r11, [r0,#24+LO]
- adc r12,r4,r12
- str r12, [r0,#24+HI]
-
- ldr r3,[sp,#40+0]
- ldr r4,[sp,#40+4]
- ldr r9, [r0,#32+LO]
- ldr r10, [r0,#32+HI]
- ldr r11, [r0,#40+LO]
- ldr r12, [r0,#40+HI]
- adds r7,r7,r9
- str r7,[r0,#32+LO]
- adc r8,r8,r10
- str r8,[r0,#32+HI]
- adds r11,r3,r11
- str r11, [r0,#40+LO]
- adc r12,r4,r12
- str r12, [r0,#40+HI]
-
- ldr r5,[sp,#48+0]
- ldr r6,[sp,#48+4]
- ldr r3,[sp,#56+0]
- ldr r4,[sp,#56+4]
- ldr r9, [r0,#48+LO]
- ldr r10, [r0,#48+HI]
- ldr r11, [r0,#56+LO]
- ldr r12, [r0,#56+HI]
- adds r9,r5,r9
- str r9, [r0,#48+LO]
- adc r10,r6,r10
- str r10, [r0,#48+HI]
- adds r11,r3,r11
- str r11, [r0,#56+LO]
- adc r12,r4,r12
- str r12, [r0,#56+HI]
-
- add sp,sp,#640
- sub r14,r14,#640
-
- teq r1,r2
- bne .Loop
-
- add sp,sp,#8*9 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.align 4
-.LNEON:
- dmb @ errata #451034 on early Cortex A8
- vstmdb sp!,{d8-d15} @ ABI specification says so
- sub r3,r3,#672 @ K512
- vldmia r0,{d16-d23} @ load context
-.Loop_neon:
- vshr.u64 d24,d20,#14 @ 0
-#if 0<16
- vld1.64 {d0},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d20,#18
- vshr.u64 d26,d20,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vsli.64 d26,d20,#23
-#if 0<16 && defined(__ARMEL__)
- vrev64.8 d0,d0
-#endif
- vadd.i64 d27,d28,d23
- veor d29,d21,d22
- veor d24,d25
- vand d29,d20
- veor d24,d26 @ Sigma1(e)
- veor d29,d22 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d16,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d16,#34
- vshr.u64 d26,d16,#39
- vsli.64 d24,d16,#36
- vsli.64 d25,d16,#30
- vsli.64 d26,d16,#25
- vadd.i64 d27,d0
- vorr d30,d16,d18
- vand d29,d16,d18
- veor d23,d24,d25
- vand d30,d17
- veor d23,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d23,d27
- vadd.i64 d19,d27
- vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 1
-#if 1<16
- vld1.64 {d1},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vsli.64 d26,d19,#23
-#if 1<16 && defined(__ARMEL__)
- vrev64.8 d1,d1
-#endif
- vadd.i64 d27,d28,d22
- veor d29,d20,d21
- veor d24,d25
- vand d29,d19
- veor d24,d26 @ Sigma1(e)
- veor d29,d21 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d23,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d23,#34
- vshr.u64 d26,d23,#39
- vsli.64 d24,d23,#36
- vsli.64 d25,d23,#30
- vsli.64 d26,d23,#25
- vadd.i64 d27,d1
- vorr d30,d23,d17
- vand d29,d23,d17
- veor d22,d24,d25
- vand d30,d16
- veor d22,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d22,d27
- vadd.i64 d18,d27
- vadd.i64 d22,d30
- vshr.u64 d24,d18,#14 @ 2
-#if 2<16
- vld1.64 {d2},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d18,#18
- vshr.u64 d26,d18,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vsli.64 d26,d18,#23
-#if 2<16 && defined(__ARMEL__)
- vrev64.8 d2,d2
-#endif
- vadd.i64 d27,d28,d21
- veor d29,d19,d20
- veor d24,d25
- vand d29,d18
- veor d24,d26 @ Sigma1(e)
- veor d29,d20 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d22,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d22,#34
- vshr.u64 d26,d22,#39
- vsli.64 d24,d22,#36
- vsli.64 d25,d22,#30
- vsli.64 d26,d22,#25
- vadd.i64 d27,d2
- vorr d30,d22,d16
- vand d29,d22,d16
- veor d21,d24,d25
- vand d30,d23
- veor d21,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d21,d27
- vadd.i64 d17,d27
- vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 3
-#if 3<16
- vld1.64 {d3},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vsli.64 d26,d17,#23
-#if 3<16 && defined(__ARMEL__)
- vrev64.8 d3,d3
-#endif
- vadd.i64 d27,d28,d20
- veor d29,d18,d19
- veor d24,d25
- vand d29,d17
- veor d24,d26 @ Sigma1(e)
- veor d29,d19 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d21,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d21,#34
- vshr.u64 d26,d21,#39
- vsli.64 d24,d21,#36
- vsli.64 d25,d21,#30
- vsli.64 d26,d21,#25
- vadd.i64 d27,d3
- vorr d30,d21,d23
- vand d29,d21,d23
- veor d20,d24,d25
- vand d30,d22
- veor d20,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d20,d27
- vadd.i64 d16,d27
- vadd.i64 d20,d30
- vshr.u64 d24,d16,#14 @ 4
-#if 4<16
- vld1.64 {d4},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d16,#18
- vshr.u64 d26,d16,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vsli.64 d26,d16,#23
-#if 4<16 && defined(__ARMEL__)
- vrev64.8 d4,d4
-#endif
- vadd.i64 d27,d28,d19
- veor d29,d17,d18
- veor d24,d25
- vand d29,d16
- veor d24,d26 @ Sigma1(e)
- veor d29,d18 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d20,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d20,#34
- vshr.u64 d26,d20,#39
- vsli.64 d24,d20,#36
- vsli.64 d25,d20,#30
- vsli.64 d26,d20,#25
- vadd.i64 d27,d4
- vorr d30,d20,d22
- vand d29,d20,d22
- veor d19,d24,d25
- vand d30,d21
- veor d19,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d19,d27
- vadd.i64 d23,d27
- vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 5
-#if 5<16
- vld1.64 {d5},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vsli.64 d26,d23,#23
-#if 5<16 && defined(__ARMEL__)
- vrev64.8 d5,d5
-#endif
- vadd.i64 d27,d28,d18
- veor d29,d16,d17
- veor d24,d25
- vand d29,d23
- veor d24,d26 @ Sigma1(e)
- veor d29,d17 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d19,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d19,#34
- vshr.u64 d26,d19,#39
- vsli.64 d24,d19,#36
- vsli.64 d25,d19,#30
- vsli.64 d26,d19,#25
- vadd.i64 d27,d5
- vorr d30,d19,d21
- vand d29,d19,d21
- veor d18,d24,d25
- vand d30,d20
- veor d18,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d18,d27
- vadd.i64 d22,d27
- vadd.i64 d18,d30
- vshr.u64 d24,d22,#14 @ 6
-#if 6<16
- vld1.64 {d6},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d22,#18
- vshr.u64 d26,d22,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vsli.64 d26,d22,#23
-#if 6<16 && defined(__ARMEL__)
- vrev64.8 d6,d6
-#endif
- vadd.i64 d27,d28,d17
- veor d29,d23,d16
- veor d24,d25
- vand d29,d22
- veor d24,d26 @ Sigma1(e)
- veor d29,d16 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d18,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d18,#34
- vshr.u64 d26,d18,#39
- vsli.64 d24,d18,#36
- vsli.64 d25,d18,#30
- vsli.64 d26,d18,#25
- vadd.i64 d27,d6
- vorr d30,d18,d20
- vand d29,d18,d20
- veor d17,d24,d25
- vand d30,d19
- veor d17,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d17,d27
- vadd.i64 d21,d27
- vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 7
-#if 7<16
- vld1.64 {d7},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vsli.64 d26,d21,#23
-#if 7<16 && defined(__ARMEL__)
- vrev64.8 d7,d7
-#endif
- vadd.i64 d27,d28,d16
- veor d29,d22,d23
- veor d24,d25
- vand d29,d21
- veor d24,d26 @ Sigma1(e)
- veor d29,d23 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d17,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d17,#34
- vshr.u64 d26,d17,#39
- vsli.64 d24,d17,#36
- vsli.64 d25,d17,#30
- vsli.64 d26,d17,#25
- vadd.i64 d27,d7
- vorr d30,d17,d19
- vand d29,d17,d19
- veor d16,d24,d25
- vand d30,d18
- veor d16,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d16,d27
- vadd.i64 d20,d27
- vadd.i64 d16,d30
- vshr.u64 d24,d20,#14 @ 8
-#if 8<16
- vld1.64 {d8},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d20,#18
- vshr.u64 d26,d20,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vsli.64 d26,d20,#23
-#if 8<16 && defined(__ARMEL__)
- vrev64.8 d8,d8
-#endif
- vadd.i64 d27,d28,d23
- veor d29,d21,d22
- veor d24,d25
- vand d29,d20
- veor d24,d26 @ Sigma1(e)
- veor d29,d22 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d16,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d16,#34
- vshr.u64 d26,d16,#39
- vsli.64 d24,d16,#36
- vsli.64 d25,d16,#30
- vsli.64 d26,d16,#25
- vadd.i64 d27,d8
- vorr d30,d16,d18
- vand d29,d16,d18
- veor d23,d24,d25
- vand d30,d17
- veor d23,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d23,d27
- vadd.i64 d19,d27
- vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 9
-#if 9<16
- vld1.64 {d9},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vsli.64 d26,d19,#23
-#if 9<16 && defined(__ARMEL__)
- vrev64.8 d9,d9
-#endif
- vadd.i64 d27,d28,d22
- veor d29,d20,d21
- veor d24,d25
- vand d29,d19
- veor d24,d26 @ Sigma1(e)
- veor d29,d21 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d23,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d23,#34
- vshr.u64 d26,d23,#39
- vsli.64 d24,d23,#36
- vsli.64 d25,d23,#30
- vsli.64 d26,d23,#25
- vadd.i64 d27,d9
- vorr d30,d23,d17
- vand d29,d23,d17
- veor d22,d24,d25
- vand d30,d16
- veor d22,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d22,d27
- vadd.i64 d18,d27
- vadd.i64 d22,d30
- vshr.u64 d24,d18,#14 @ 10
-#if 10<16
- vld1.64 {d10},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d18,#18
- vshr.u64 d26,d18,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vsli.64 d26,d18,#23
-#if 10<16 && defined(__ARMEL__)
- vrev64.8 d10,d10
-#endif
- vadd.i64 d27,d28,d21
- veor d29,d19,d20
- veor d24,d25
- vand d29,d18
- veor d24,d26 @ Sigma1(e)
- veor d29,d20 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d22,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d22,#34
- vshr.u64 d26,d22,#39
- vsli.64 d24,d22,#36
- vsli.64 d25,d22,#30
- vsli.64 d26,d22,#25
- vadd.i64 d27,d10
- vorr d30,d22,d16
- vand d29,d22,d16
- veor d21,d24,d25
- vand d30,d23
- veor d21,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d21,d27
- vadd.i64 d17,d27
- vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 11
-#if 11<16
- vld1.64 {d11},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vsli.64 d26,d17,#23
-#if 11<16 && defined(__ARMEL__)
- vrev64.8 d11,d11
-#endif
- vadd.i64 d27,d28,d20
- veor d29,d18,d19
- veor d24,d25
- vand d29,d17
- veor d24,d26 @ Sigma1(e)
- veor d29,d19 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d21,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d21,#34
- vshr.u64 d26,d21,#39
- vsli.64 d24,d21,#36
- vsli.64 d25,d21,#30
- vsli.64 d26,d21,#25
- vadd.i64 d27,d11
- vorr d30,d21,d23
- vand d29,d21,d23
- veor d20,d24,d25
- vand d30,d22
- veor d20,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d20,d27
- vadd.i64 d16,d27
- vadd.i64 d20,d30
- vshr.u64 d24,d16,#14 @ 12
-#if 12<16
- vld1.64 {d12},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d16,#18
- vshr.u64 d26,d16,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vsli.64 d26,d16,#23
-#if 12<16 && defined(__ARMEL__)
- vrev64.8 d12,d12
-#endif
- vadd.i64 d27,d28,d19
- veor d29,d17,d18
- veor d24,d25
- vand d29,d16
- veor d24,d26 @ Sigma1(e)
- veor d29,d18 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d20,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d20,#34
- vshr.u64 d26,d20,#39
- vsli.64 d24,d20,#36
- vsli.64 d25,d20,#30
- vsli.64 d26,d20,#25
- vadd.i64 d27,d12
- vorr d30,d20,d22
- vand d29,d20,d22
- veor d19,d24,d25
- vand d30,d21
- veor d19,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d19,d27
- vadd.i64 d23,d27
- vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 13
-#if 13<16
- vld1.64 {d13},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vsli.64 d26,d23,#23
-#if 13<16 && defined(__ARMEL__)
- vrev64.8 d13,d13
-#endif
- vadd.i64 d27,d28,d18
- veor d29,d16,d17
- veor d24,d25
- vand d29,d23
- veor d24,d26 @ Sigma1(e)
- veor d29,d17 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d19,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d19,#34
- vshr.u64 d26,d19,#39
- vsli.64 d24,d19,#36
- vsli.64 d25,d19,#30
- vsli.64 d26,d19,#25
- vadd.i64 d27,d13
- vorr d30,d19,d21
- vand d29,d19,d21
- veor d18,d24,d25
- vand d30,d20
- veor d18,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d18,d27
- vadd.i64 d22,d27
- vadd.i64 d18,d30
- vshr.u64 d24,d22,#14 @ 14
-#if 14<16
- vld1.64 {d14},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d22,#18
- vshr.u64 d26,d22,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vsli.64 d26,d22,#23
-#if 14<16 && defined(__ARMEL__)
- vrev64.8 d14,d14
-#endif
- vadd.i64 d27,d28,d17
- veor d29,d23,d16
- veor d24,d25
- vand d29,d22
- veor d24,d26 @ Sigma1(e)
- veor d29,d16 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d18,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d18,#34
- vshr.u64 d26,d18,#39
- vsli.64 d24,d18,#36
- vsli.64 d25,d18,#30
- vsli.64 d26,d18,#25
- vadd.i64 d27,d14
- vorr d30,d18,d20
- vand d29,d18,d20
- veor d17,d24,d25
- vand d30,d19
- veor d17,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d17,d27
- vadd.i64 d21,d27
- vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 15
-#if 15<16
- vld1.64 {d15},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vsli.64 d26,d21,#23
-#if 15<16 && defined(__ARMEL__)
- vrev64.8 d15,d15
-#endif
- vadd.i64 d27,d28,d16
- veor d29,d22,d23
- veor d24,d25
- vand d29,d21
- veor d24,d26 @ Sigma1(e)
- veor d29,d23 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d17,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d17,#34
- vshr.u64 d26,d17,#39
- vsli.64 d24,d17,#36
- vsli.64 d25,d17,#30
- vsli.64 d26,d17,#25
- vadd.i64 d27,d15
- vorr d30,d17,d19
- vand d29,d17,d19
- veor d16,d24,d25
- vand d30,d18
- veor d16,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d16,d27
- vadd.i64 d20,d27
- vadd.i64 d16,d30
- mov r12,#4
-.L16_79_neon:
- subs r12,#1
- vshr.u64 q12,q7,#19
- vshr.u64 q13,q7,#61
- vshr.u64 q15,q7,#6
- vsli.64 q12,q7,#45
- vext.8 q14,q0,q1,#8 @ X[i+1]
- vsli.64 q13,q7,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q0,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q4,q5,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d20,#14 @ from NEON_00_15
- vadd.i64 q0,q14
- vshr.u64 d25,d20,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d20,#41 @ from NEON_00_15
- vadd.i64 q0,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vsli.64 d26,d20,#23
-#if 16<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d23
- veor d29,d21,d22
- veor d24,d25
- vand d29,d20
- veor d24,d26 @ Sigma1(e)
- veor d29,d22 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d16,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d16,#34
- vshr.u64 d26,d16,#39
- vsli.64 d24,d16,#36
- vsli.64 d25,d16,#30
- vsli.64 d26,d16,#25
- vadd.i64 d27,d0
- vorr d30,d16,d18
- vand d29,d16,d18
- veor d23,d24,d25
- vand d30,d17
- veor d23,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d23,d27
- vadd.i64 d19,d27
- vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 17
-#if 17<16
- vld1.64 {d1},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vsli.64 d26,d19,#23
-#if 17<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d22
- veor d29,d20,d21
- veor d24,d25
- vand d29,d19
- veor d24,d26 @ Sigma1(e)
- veor d29,d21 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d23,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d23,#34
- vshr.u64 d26,d23,#39
- vsli.64 d24,d23,#36
- vsli.64 d25,d23,#30
- vsli.64 d26,d23,#25
- vadd.i64 d27,d1
- vorr d30,d23,d17
- vand d29,d23,d17
- veor d22,d24,d25
- vand d30,d16
- veor d22,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d22,d27
- vadd.i64 d18,d27
- vadd.i64 d22,d30
- vshr.u64 q12,q0,#19
- vshr.u64 q13,q0,#61
- vshr.u64 q15,q0,#6
- vsli.64 q12,q0,#45
- vext.8 q14,q1,q2,#8 @ X[i+1]
- vsli.64 q13,q0,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q1,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q5,q6,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d18,#14 @ from NEON_00_15
- vadd.i64 q1,q14
- vshr.u64 d25,d18,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d18,#41 @ from NEON_00_15
- vadd.i64 q1,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vsli.64 d26,d18,#23
-#if 18<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d21
- veor d29,d19,d20
- veor d24,d25
- vand d29,d18
- veor d24,d26 @ Sigma1(e)
- veor d29,d20 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d22,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d22,#34
- vshr.u64 d26,d22,#39
- vsli.64 d24,d22,#36
- vsli.64 d25,d22,#30
- vsli.64 d26,d22,#25
- vadd.i64 d27,d2
- vorr d30,d22,d16
- vand d29,d22,d16
- veor d21,d24,d25
- vand d30,d23
- veor d21,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d21,d27
- vadd.i64 d17,d27
- vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 19
-#if 19<16
- vld1.64 {d3},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vsli.64 d26,d17,#23
-#if 19<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d20
- veor d29,d18,d19
- veor d24,d25
- vand d29,d17
- veor d24,d26 @ Sigma1(e)
- veor d29,d19 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d21,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d21,#34
- vshr.u64 d26,d21,#39
- vsli.64 d24,d21,#36
- vsli.64 d25,d21,#30
- vsli.64 d26,d21,#25
- vadd.i64 d27,d3
- vorr d30,d21,d23
- vand d29,d21,d23
- veor d20,d24,d25
- vand d30,d22
- veor d20,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d20,d27
- vadd.i64 d16,d27
- vadd.i64 d20,d30
- vshr.u64 q12,q1,#19
- vshr.u64 q13,q1,#61
- vshr.u64 q15,q1,#6
- vsli.64 q12,q1,#45
- vext.8 q14,q2,q3,#8 @ X[i+1]
- vsli.64 q13,q1,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q2,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q6,q7,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d16,#14 @ from NEON_00_15
- vadd.i64 q2,q14
- vshr.u64 d25,d16,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d16,#41 @ from NEON_00_15
- vadd.i64 q2,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vsli.64 d26,d16,#23
-#if 20<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d19
- veor d29,d17,d18
- veor d24,d25
- vand d29,d16
- veor d24,d26 @ Sigma1(e)
- veor d29,d18 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d20,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d20,#34
- vshr.u64 d26,d20,#39
- vsli.64 d24,d20,#36
- vsli.64 d25,d20,#30
- vsli.64 d26,d20,#25
- vadd.i64 d27,d4
- vorr d30,d20,d22
- vand d29,d20,d22
- veor d19,d24,d25
- vand d30,d21
- veor d19,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d19,d27
- vadd.i64 d23,d27
- vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 21
-#if 21<16
- vld1.64 {d5},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vsli.64 d26,d23,#23
-#if 21<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d18
- veor d29,d16,d17
- veor d24,d25
- vand d29,d23
- veor d24,d26 @ Sigma1(e)
- veor d29,d17 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d19,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d19,#34
- vshr.u64 d26,d19,#39
- vsli.64 d24,d19,#36
- vsli.64 d25,d19,#30
- vsli.64 d26,d19,#25
- vadd.i64 d27,d5
- vorr d30,d19,d21
- vand d29,d19,d21
- veor d18,d24,d25
- vand d30,d20
- veor d18,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d18,d27
- vadd.i64 d22,d27
- vadd.i64 d18,d30
- vshr.u64 q12,q2,#19
- vshr.u64 q13,q2,#61
- vshr.u64 q15,q2,#6
- vsli.64 q12,q2,#45
- vext.8 q14,q3,q4,#8 @ X[i+1]
- vsli.64 q13,q2,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q3,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q7,q0,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d22,#14 @ from NEON_00_15
- vadd.i64 q3,q14
- vshr.u64 d25,d22,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d22,#41 @ from NEON_00_15
- vadd.i64 q3,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vsli.64 d26,d22,#23
-#if 22<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d17
- veor d29,d23,d16
- veor d24,d25
- vand d29,d22
- veor d24,d26 @ Sigma1(e)
- veor d29,d16 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d18,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d18,#34
- vshr.u64 d26,d18,#39
- vsli.64 d24,d18,#36
- vsli.64 d25,d18,#30
- vsli.64 d26,d18,#25
- vadd.i64 d27,d6
- vorr d30,d18,d20
- vand d29,d18,d20
- veor d17,d24,d25
- vand d30,d19
- veor d17,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d17,d27
- vadd.i64 d21,d27
- vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 23
-#if 23<16
- vld1.64 {d7},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vsli.64 d26,d21,#23
-#if 23<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d16
- veor d29,d22,d23
- veor d24,d25
- vand d29,d21
- veor d24,d26 @ Sigma1(e)
- veor d29,d23 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d17,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d17,#34
- vshr.u64 d26,d17,#39
- vsli.64 d24,d17,#36
- vsli.64 d25,d17,#30
- vsli.64 d26,d17,#25
- vadd.i64 d27,d7
- vorr d30,d17,d19
- vand d29,d17,d19
- veor d16,d24,d25
- vand d30,d18
- veor d16,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d16,d27
- vadd.i64 d20,d27
- vadd.i64 d16,d30
- vshr.u64 q12,q3,#19
- vshr.u64 q13,q3,#61
- vshr.u64 q15,q3,#6
- vsli.64 q12,q3,#45
- vext.8 q14,q4,q5,#8 @ X[i+1]
- vsli.64 q13,q3,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q4,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q0,q1,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d20,#14 @ from NEON_00_15
- vadd.i64 q4,q14
- vshr.u64 d25,d20,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d20,#41 @ from NEON_00_15
- vadd.i64 q4,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vsli.64 d26,d20,#23
-#if 24<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d23
- veor d29,d21,d22
- veor d24,d25
- vand d29,d20
- veor d24,d26 @ Sigma1(e)
- veor d29,d22 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d16,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d16,#34
- vshr.u64 d26,d16,#39
- vsli.64 d24,d16,#36
- vsli.64 d25,d16,#30
- vsli.64 d26,d16,#25
- vadd.i64 d27,d8
- vorr d30,d16,d18
- vand d29,d16,d18
- veor d23,d24,d25
- vand d30,d17
- veor d23,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d23,d27
- vadd.i64 d19,d27
- vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 25
-#if 25<16
- vld1.64 {d9},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vsli.64 d26,d19,#23
-#if 25<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d22
- veor d29,d20,d21
- veor d24,d25
- vand d29,d19
- veor d24,d26 @ Sigma1(e)
- veor d29,d21 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d23,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d23,#34
- vshr.u64 d26,d23,#39
- vsli.64 d24,d23,#36
- vsli.64 d25,d23,#30
- vsli.64 d26,d23,#25
- vadd.i64 d27,d9
- vorr d30,d23,d17
- vand d29,d23,d17
- veor d22,d24,d25
- vand d30,d16
- veor d22,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d22,d27
- vadd.i64 d18,d27
- vadd.i64 d22,d30
- vshr.u64 q12,q4,#19
- vshr.u64 q13,q4,#61
- vshr.u64 q15,q4,#6
- vsli.64 q12,q4,#45
- vext.8 q14,q5,q6,#8 @ X[i+1]
- vsli.64 q13,q4,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q5,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q1,q2,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d18,#14 @ from NEON_00_15
- vadd.i64 q5,q14
- vshr.u64 d25,d18,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d18,#41 @ from NEON_00_15
- vadd.i64 q5,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vsli.64 d26,d18,#23
-#if 26<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d21
- veor d29,d19,d20
- veor d24,d25
- vand d29,d18
- veor d24,d26 @ Sigma1(e)
- veor d29,d20 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d22,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d22,#34
- vshr.u64 d26,d22,#39
- vsli.64 d24,d22,#36
- vsli.64 d25,d22,#30
- vsli.64 d26,d22,#25
- vadd.i64 d27,d10
- vorr d30,d22,d16
- vand d29,d22,d16
- veor d21,d24,d25
- vand d30,d23
- veor d21,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d21,d27
- vadd.i64 d17,d27
- vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 27
-#if 27<16
- vld1.64 {d11},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vsli.64 d26,d17,#23
-#if 27<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d20
- veor d29,d18,d19
- veor d24,d25
- vand d29,d17
- veor d24,d26 @ Sigma1(e)
- veor d29,d19 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d21,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d21,#34
- vshr.u64 d26,d21,#39
- vsli.64 d24,d21,#36
- vsli.64 d25,d21,#30
- vsli.64 d26,d21,#25
- vadd.i64 d27,d11
- vorr d30,d21,d23
- vand d29,d21,d23
- veor d20,d24,d25
- vand d30,d22
- veor d20,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d20,d27
- vadd.i64 d16,d27
- vadd.i64 d20,d30
- vshr.u64 q12,q5,#19
- vshr.u64 q13,q5,#61
- vshr.u64 q15,q5,#6
- vsli.64 q12,q5,#45
- vext.8 q14,q6,q7,#8 @ X[i+1]
- vsli.64 q13,q5,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q6,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q2,q3,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d16,#14 @ from NEON_00_15
- vadd.i64 q6,q14
- vshr.u64 d25,d16,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d16,#41 @ from NEON_00_15
- vadd.i64 q6,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vsli.64 d26,d16,#23
-#if 28<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d19
- veor d29,d17,d18
- veor d24,d25
- vand d29,d16
- veor d24,d26 @ Sigma1(e)
- veor d29,d18 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d20,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d20,#34
- vshr.u64 d26,d20,#39
- vsli.64 d24,d20,#36
- vsli.64 d25,d20,#30
- vsli.64 d26,d20,#25
- vadd.i64 d27,d12
- vorr d30,d20,d22
- vand d29,d20,d22
- veor d19,d24,d25
- vand d30,d21
- veor d19,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d19,d27
- vadd.i64 d23,d27
- vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 29
-#if 29<16
- vld1.64 {d13},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vsli.64 d26,d23,#23
-#if 29<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d18
- veor d29,d16,d17
- veor d24,d25
- vand d29,d23
- veor d24,d26 @ Sigma1(e)
- veor d29,d17 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d19,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d19,#34
- vshr.u64 d26,d19,#39
- vsli.64 d24,d19,#36
- vsli.64 d25,d19,#30
- vsli.64 d26,d19,#25
- vadd.i64 d27,d13
- vorr d30,d19,d21
- vand d29,d19,d21
- veor d18,d24,d25
- vand d30,d20
- veor d18,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d18,d27
- vadd.i64 d22,d27
- vadd.i64 d18,d30
- vshr.u64 q12,q6,#19
- vshr.u64 q13,q6,#61
- vshr.u64 q15,q6,#6
- vsli.64 q12,q6,#45
- vext.8 q14,q7,q0,#8 @ X[i+1]
- vsli.64 q13,q6,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q7,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q3,q4,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d22,#14 @ from NEON_00_15
- vadd.i64 q7,q14
- vshr.u64 d25,d22,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d22,#41 @ from NEON_00_15
- vadd.i64 q7,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vsli.64 d26,d22,#23
-#if 30<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d17
- veor d29,d23,d16
- veor d24,d25
- vand d29,d22
- veor d24,d26 @ Sigma1(e)
- veor d29,d16 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d18,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d18,#34
- vshr.u64 d26,d18,#39
- vsli.64 d24,d18,#36
- vsli.64 d25,d18,#30
- vsli.64 d26,d18,#25
- vadd.i64 d27,d14
- vorr d30,d18,d20
- vand d29,d18,d20
- veor d17,d24,d25
- vand d30,d19
- veor d17,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d17,d27
- vadd.i64 d21,d27
- vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 31
-#if 31<16
- vld1.64 {d15},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vsli.64 d26,d21,#23
-#if 31<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- vadd.i64 d27,d28,d16
- veor d29,d22,d23
- veor d24,d25
- vand d29,d21
- veor d24,d26 @ Sigma1(e)
- veor d29,d23 @ Ch(e,f,g)
- vadd.i64 d27,d24
- vshr.u64 d24,d17,#28
- vadd.i64 d27,d29
- vshr.u64 d25,d17,#34
- vshr.u64 d26,d17,#39
- vsli.64 d24,d17,#36
- vsli.64 d25,d17,#30
- vsli.64 d26,d17,#25
- vadd.i64 d27,d15
- vorr d30,d17,d19
- vand d29,d17,d19
- veor d16,d24,d25
- vand d30,d18
- veor d16,d26 @ Sigma0(a)
- vorr d30,d29 @ Maj(a,b,c)
- vadd.i64 d16,d27
- vadd.i64 d20,d27
- vadd.i64 d16,d30
- bne .L16_79_neon
-
- vldmia r0,{d24-d31} @ load context to temp
- vadd.i64 q8,q12 @ vectorized accumulate
- vadd.i64 q9,q13
- vadd.i64 q10,q14
- vadd.i64 q11,q15
- vstmia r0,{d16-d23} @ save context
- teq r1,r2
- sub r3,#640 @ rewind K512
- bne .Loop_neon
-
- vldmia sp!,{d8-d15} @ epilogue
- bx lr @ .word 0xe12fff1e
-#endif
-.size sha512_block_data_order,.-sha512_block_data_order
-.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha512-armv4.pl b/app/openssl/crypto/sha/asm/sha512-armv4.pl
deleted file mode 100644
index 71aa9356..00000000
--- a/app/openssl/crypto/sha/asm/sha512-armv4.pl
+++ /dev/null
@@ -1,583 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA512 block procedure for ARMv4. September 2007.
-
-# This code is ~4.5 (four and a half) times faster than code generated
-# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
-# Xscale PXA250 core].
-#
-# July 2010.
-#
-# Rescheduling for dual-issue pipeline resulted in 6% improvement on
-# Cortex A8 core and ~40 cycles per processed byte.
-
-# February 2011.
-#
-# Profiler-assisted and platform-specific optimization resulted in 7%
-# improvement on Coxtex A8 core and ~38 cycles per byte.
-
-# March 2011.
-#
-# Add NEON implementation. On Cortex A8 it was measured to process
-# one byte in 25.5 cycles or 47% faster than integer-only code.
-
-# Byte order [in]dependence. =========================================
-#
-# Originally caller was expected to maintain specific *dword* order in
-# h[0-7], namely with most significant dword at *lower* address, which
-# was reflected in below two parameters as 0 and 4. Now caller is
-# expected to maintain native byte order for whole 64-bit values.
-$hi="HI";
-$lo="LO";
-# ====================================================================
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-$ctx="r0"; # parameter block
-$inp="r1";
-$len="r2";
-
-$Tlo="r3";
-$Thi="r4";
-$Alo="r5";
-$Ahi="r6";
-$Elo="r7";
-$Ehi="r8";
-$t0="r9";
-$t1="r10";
-$t2="r11";
-$t3="r12";
-############ r13 is stack pointer
-$Ktbl="r14";
-############ r15 is program counter
-
-$Aoff=8*0;
-$Boff=8*1;
-$Coff=8*2;
-$Doff=8*3;
-$Eoff=8*4;
-$Foff=8*5;
-$Goff=8*6;
-$Hoff=8*7;
-$Xoff=8*8;
-
-sub BODY_00_15() {
-my $magic = shift;
-$code.=<<___;
- @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- mov $t0,$Elo,lsr#14
- str $Tlo,[sp,#$Xoff+0]
- mov $t1,$Ehi,lsr#14
- str $Thi,[sp,#$Xoff+4]
- eor $t0,$t0,$Ehi,lsl#18
- ldr $t2,[sp,#$Hoff+0] @ h.lo
- eor $t1,$t1,$Elo,lsl#18
- ldr $t3,[sp,#$Hoff+4] @ h.hi
- eor $t0,$t0,$Elo,lsr#18
- eor $t1,$t1,$Ehi,lsr#18
- eor $t0,$t0,$Ehi,lsl#14
- eor $t1,$t1,$Elo,lsl#14
- eor $t0,$t0,$Ehi,lsr#9
- eor $t1,$t1,$Elo,lsr#9
- eor $t0,$t0,$Elo,lsl#23
- eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
- adds $Tlo,$Tlo,$t0
- ldr $t0,[sp,#$Foff+0] @ f.lo
- adc $Thi,$Thi,$t1 @ T += Sigma1(e)
- ldr $t1,[sp,#$Foff+4] @ f.hi
- adds $Tlo,$Tlo,$t2
- ldr $t2,[sp,#$Goff+0] @ g.lo
- adc $Thi,$Thi,$t3 @ T += h
- ldr $t3,[sp,#$Goff+4] @ g.hi
-
- eor $t0,$t0,$t2
- str $Elo,[sp,#$Eoff+0]
- eor $t1,$t1,$t3
- str $Ehi,[sp,#$Eoff+4]
- and $t0,$t0,$Elo
- str $Alo,[sp,#$Aoff+0]
- and $t1,$t1,$Ehi
- str $Ahi,[sp,#$Aoff+4]
- eor $t0,$t0,$t2
- ldr $t2,[$Ktbl,#$lo] @ K[i].lo
- eor $t1,$t1,$t3 @ Ch(e,f,g)
- ldr $t3,[$Ktbl,#$hi] @ K[i].hi
-
- adds $Tlo,$Tlo,$t0
- ldr $Elo,[sp,#$Doff+0] @ d.lo
- adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
- ldr $Ehi,[sp,#$Doff+4] @ d.hi
- adds $Tlo,$Tlo,$t2
- and $t0,$t2,#0xff
- adc $Thi,$Thi,$t3 @ T += K[i]
- adds $Elo,$Elo,$Tlo
- ldr $t2,[sp,#$Boff+0] @ b.lo
- adc $Ehi,$Ehi,$Thi @ d += T
- teq $t0,#$magic
-
- ldr $t3,[sp,#$Coff+0] @ c.lo
- orreq $Ktbl,$Ktbl,#1
- @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- mov $t0,$Alo,lsr#28
- mov $t1,$Ahi,lsr#28
- eor $t0,$t0,$Ahi,lsl#4
- eor $t1,$t1,$Alo,lsl#4
- eor $t0,$t0,$Ahi,lsr#2
- eor $t1,$t1,$Alo,lsr#2
- eor $t0,$t0,$Alo,lsl#30
- eor $t1,$t1,$Ahi,lsl#30
- eor $t0,$t0,$Ahi,lsr#7
- eor $t1,$t1,$Alo,lsr#7
- eor $t0,$t0,$Alo,lsl#25
- eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
- adds $Tlo,$Tlo,$t0
- and $t0,$Alo,$t2
- adc $Thi,$Thi,$t1 @ T += Sigma0(a)
-
- ldr $t1,[sp,#$Boff+4] @ b.hi
- orr $Alo,$Alo,$t2
- ldr $t2,[sp,#$Coff+4] @ c.hi
- and $Alo,$Alo,$t3
- and $t3,$Ahi,$t1
- orr $Ahi,$Ahi,$t1
- orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
- and $Ahi,$Ahi,$t2
- adds $Alo,$Alo,$Tlo
- orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
- sub sp,sp,#8
- adc $Ahi,$Ahi,$Thi @ h += T
- tst $Ktbl,#1
- add $Ktbl,$Ktbl,#8
-___
-}
-$code=<<___;
-#include "arm_arch.h"
-#ifdef __ARMEL__
-# define LO 0
-# define HI 4
-# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
-#else
-# define HI 0
-# define LO 4
-# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
-#endif
-
-.text
-.code 32
-.type K512,%object
-.align 5
-K512:
-WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
-WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
-WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
-WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
-WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
-WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
-WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
-WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
-WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
-WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
-WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
-WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
-WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
-WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
-WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
-WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
-WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
-WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
-WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
-WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
-WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
-WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
-WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
-WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
-WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
-WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
-WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
-WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
-WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
-WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
-WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
-WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
-WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
-WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
-WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
-WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
-WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
-WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
-WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
-WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
-.size K512,.-K512
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha512_block_data_order
-.skip 32-4
-
-.global sha512_block_data_order
-.type sha512_block_data_order,%function
-sha512_block_data_order:
- sub r3,pc,#8 @ sha512_block_data_order
- add $len,$inp,$len,lsl#7 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#1
- bne .LNEON
-#endif
- stmdb sp!,{r4-r12,lr}
- sub $Ktbl,r3,#672 @ K512
- sub sp,sp,#9*8
-
- ldr $Elo,[$ctx,#$Eoff+$lo]
- ldr $Ehi,[$ctx,#$Eoff+$hi]
- ldr $t0, [$ctx,#$Goff+$lo]
- ldr $t1, [$ctx,#$Goff+$hi]
- ldr $t2, [$ctx,#$Hoff+$lo]
- ldr $t3, [$ctx,#$Hoff+$hi]
-.Loop:
- str $t0, [sp,#$Goff+0]
- str $t1, [sp,#$Goff+4]
- str $t2, [sp,#$Hoff+0]
- str $t3, [sp,#$Hoff+4]
- ldr $Alo,[$ctx,#$Aoff+$lo]
- ldr $Ahi,[$ctx,#$Aoff+$hi]
- ldr $Tlo,[$ctx,#$Boff+$lo]
- ldr $Thi,[$ctx,#$Boff+$hi]
- ldr $t0, [$ctx,#$Coff+$lo]
- ldr $t1, [$ctx,#$Coff+$hi]
- ldr $t2, [$ctx,#$Doff+$lo]
- ldr $t3, [$ctx,#$Doff+$hi]
- str $Tlo,[sp,#$Boff+0]
- str $Thi,[sp,#$Boff+4]
- str $t0, [sp,#$Coff+0]
- str $t1, [sp,#$Coff+4]
- str $t2, [sp,#$Doff+0]
- str $t3, [sp,#$Doff+4]
- ldr $Tlo,[$ctx,#$Foff+$lo]
- ldr $Thi,[$ctx,#$Foff+$hi]
- str $Tlo,[sp,#$Foff+0]
- str $Thi,[sp,#$Foff+4]
-
-.L00_15:
-#if __ARM_ARCH__<7
- ldrb $Tlo,[$inp,#7]
- ldrb $t0, [$inp,#6]
- ldrb $t1, [$inp,#5]
- ldrb $t2, [$inp,#4]
- ldrb $Thi,[$inp,#3]
- ldrb $t3, [$inp,#2]
- orr $Tlo,$Tlo,$t0,lsl#8
- ldrb $t0, [$inp,#1]
- orr $Tlo,$Tlo,$t1,lsl#16
- ldrb $t1, [$inp],#8
- orr $Tlo,$Tlo,$t2,lsl#24
- orr $Thi,$Thi,$t3,lsl#8
- orr $Thi,$Thi,$t0,lsl#16
- orr $Thi,$Thi,$t1,lsl#24
-#else
- ldr $Tlo,[$inp,#4]
- ldr $Thi,[$inp],#8
-#ifdef __ARMEL__
- rev $Tlo,$Tlo
- rev $Thi,$Thi
-#endif
-#endif
-___
- &BODY_00_15(0x94);
-$code.=<<___;
- tst $Ktbl,#1
- beq .L00_15
- ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
- ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
- bic $Ktbl,$Ktbl,#1
-.L16_79:
- @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
- @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
- @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
- mov $Tlo,$t0,lsr#1
- ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
- mov $Thi,$t1,lsr#1
- ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
- eor $Tlo,$Tlo,$t1,lsl#31
- eor $Thi,$Thi,$t0,lsl#31
- eor $Tlo,$Tlo,$t0,lsr#8
- eor $Thi,$Thi,$t1,lsr#8
- eor $Tlo,$Tlo,$t1,lsl#24
- eor $Thi,$Thi,$t0,lsl#24
- eor $Tlo,$Tlo,$t0,lsr#7
- eor $Thi,$Thi,$t1,lsr#7
- eor $Tlo,$Tlo,$t1,lsl#25
-
- @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
- @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
- @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
- mov $t0,$t2,lsr#19
- mov $t1,$t3,lsr#19
- eor $t0,$t0,$t3,lsl#13
- eor $t1,$t1,$t2,lsl#13
- eor $t0,$t0,$t3,lsr#29
- eor $t1,$t1,$t2,lsr#29
- eor $t0,$t0,$t2,lsl#3
- eor $t1,$t1,$t3,lsl#3
- eor $t0,$t0,$t2,lsr#6
- eor $t1,$t1,$t3,lsr#6
- ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
- eor $t0,$t0,$t3,lsl#26
-
- ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
- adds $Tlo,$Tlo,$t0
- ldr $t0,[sp,#`$Xoff+8*16`+0]
- adc $Thi,$Thi,$t1
-
- ldr $t1,[sp,#`$Xoff+8*16`+4]
- adds $Tlo,$Tlo,$t2
- adc $Thi,$Thi,$t3
- adds $Tlo,$Tlo,$t0
- adc $Thi,$Thi,$t1
-___
- &BODY_00_15(0x17);
-$code.=<<___;
- ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
- ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
- beq .L16_79
- bic $Ktbl,$Ktbl,#1
-
- ldr $Tlo,[sp,#$Boff+0]
- ldr $Thi,[sp,#$Boff+4]
- ldr $t0, [$ctx,#$Aoff+$lo]
- ldr $t1, [$ctx,#$Aoff+$hi]
- ldr $t2, [$ctx,#$Boff+$lo]
- ldr $t3, [$ctx,#$Boff+$hi]
- adds $t0,$Alo,$t0
- str $t0, [$ctx,#$Aoff+$lo]
- adc $t1,$Ahi,$t1
- str $t1, [$ctx,#$Aoff+$hi]
- adds $t2,$Tlo,$t2
- str $t2, [$ctx,#$Boff+$lo]
- adc $t3,$Thi,$t3
- str $t3, [$ctx,#$Boff+$hi]
-
- ldr $Alo,[sp,#$Coff+0]
- ldr $Ahi,[sp,#$Coff+4]
- ldr $Tlo,[sp,#$Doff+0]
- ldr $Thi,[sp,#$Doff+4]
- ldr $t0, [$ctx,#$Coff+$lo]
- ldr $t1, [$ctx,#$Coff+$hi]
- ldr $t2, [$ctx,#$Doff+$lo]
- ldr $t3, [$ctx,#$Doff+$hi]
- adds $t0,$Alo,$t0
- str $t0, [$ctx,#$Coff+$lo]
- adc $t1,$Ahi,$t1
- str $t1, [$ctx,#$Coff+$hi]
- adds $t2,$Tlo,$t2
- str $t2, [$ctx,#$Doff+$lo]
- adc $t3,$Thi,$t3
- str $t3, [$ctx,#$Doff+$hi]
-
- ldr $Tlo,[sp,#$Foff+0]
- ldr $Thi,[sp,#$Foff+4]
- ldr $t0, [$ctx,#$Eoff+$lo]
- ldr $t1, [$ctx,#$Eoff+$hi]
- ldr $t2, [$ctx,#$Foff+$lo]
- ldr $t3, [$ctx,#$Foff+$hi]
- adds $Elo,$Elo,$t0
- str $Elo,[$ctx,#$Eoff+$lo]
- adc $Ehi,$Ehi,$t1
- str $Ehi,[$ctx,#$Eoff+$hi]
- adds $t2,$Tlo,$t2
- str $t2, [$ctx,#$Foff+$lo]
- adc $t3,$Thi,$t3
- str $t3, [$ctx,#$Foff+$hi]
-
- ldr $Alo,[sp,#$Goff+0]
- ldr $Ahi,[sp,#$Goff+4]
- ldr $Tlo,[sp,#$Hoff+0]
- ldr $Thi,[sp,#$Hoff+4]
- ldr $t0, [$ctx,#$Goff+$lo]
- ldr $t1, [$ctx,#$Goff+$hi]
- ldr $t2, [$ctx,#$Hoff+$lo]
- ldr $t3, [$ctx,#$Hoff+$hi]
- adds $t0,$Alo,$t0
- str $t0, [$ctx,#$Goff+$lo]
- adc $t1,$Ahi,$t1
- str $t1, [$ctx,#$Goff+$hi]
- adds $t2,$Tlo,$t2
- str $t2, [$ctx,#$Hoff+$lo]
- adc $t3,$Thi,$t3
- str $t3, [$ctx,#$Hoff+$hi]
-
- add sp,sp,#640
- sub $Ktbl,$Ktbl,#640
-
- teq $inp,$len
- bne .Loop
-
- add sp,sp,#8*9 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- bx lr @ interoperable with Thumb ISA:-)
-#endif
-___
-
-{
-my @Sigma0=(28,34,39);
-my @Sigma1=(14,18,41);
-my @sigma0=(1, 8, 7);
-my @sigma1=(19,61,6);
-
-my $Ktbl="r3";
-my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch
-
-my @X=map("d$_",(0..15));
-my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
-
-sub NEON_00_15() {
-my $i=shift;
-my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
-my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps
-
-$code.=<<___ if ($i<16 || $i&1);
- vshr.u64 $t0,$e,#@Sigma1[0] @ $i
-#if $i<16
- vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
-#endif
- vshr.u64 $t1,$e,#@Sigma1[1]
- vshr.u64 $t2,$e,#@Sigma1[2]
-___
-$code.=<<___;
- vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
- vsli.64 $t0,$e,#`64-@Sigma1[0]`
- vsli.64 $t1,$e,#`64-@Sigma1[1]`
- vsli.64 $t2,$e,#`64-@Sigma1[2]`
-#if $i<16 && defined(__ARMEL__)
- vrev64.8 @X[$i],@X[$i]
-#endif
- vadd.i64 $T1,$K,$h
- veor $Ch,$f,$g
- veor $t0,$t1
- vand $Ch,$e
- veor $t0,$t2 @ Sigma1(e)
- veor $Ch,$g @ Ch(e,f,g)
- vadd.i64 $T1,$t0
- vshr.u64 $t0,$a,#@Sigma0[0]
- vadd.i64 $T1,$Ch
- vshr.u64 $t1,$a,#@Sigma0[1]
- vshr.u64 $t2,$a,#@Sigma0[2]
- vsli.64 $t0,$a,#`64-@Sigma0[0]`
- vsli.64 $t1,$a,#`64-@Sigma0[1]`
- vsli.64 $t2,$a,#`64-@Sigma0[2]`
- vadd.i64 $T1,@X[$i%16]
- vorr $Maj,$a,$c
- vand $Ch,$a,$c
- veor $h,$t0,$t1
- vand $Maj,$b
- veor $h,$t2 @ Sigma0(a)
- vorr $Maj,$Ch @ Maj(a,b,c)
- vadd.i64 $h,$T1
- vadd.i64 $d,$T1
- vadd.i64 $h,$Maj
-___
-}
-
-sub NEON_16_79() {
-my $i=shift;
-
-if ($i&1) { &NEON_00_15($i,@_); return; }
-
-# 2x-vectorized, therefore runs every 2nd round
-my @X=map("q$_",(0..7)); # view @X as 128-bit vector
-my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps
-my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15
-my $e=@_[4]; # $e from NEON_00_15
-$i /= 2;
-$code.=<<___;
- vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0]
- vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1]
- vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2]
- vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]`
- vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1]
- vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]`
- veor $s1,$t0
- vshr.u64 $t0,$s0,#@sigma0[0]
- veor $s1,$t1 @ sigma1(X[i+14])
- vshr.u64 $t1,$s0,#@sigma0[1]
- vadd.i64 @X[$i%8],$s1
- vshr.u64 $s1,$s0,#@sigma0[2]
- vsli.64 $t0,$s0,#`64-@sigma0[0]`
- vsli.64 $t1,$s0,#`64-@sigma0[1]`
- vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9]
- veor $s1,$t0
- vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15
- vadd.i64 @X[$i%8],$s0
- vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15
- veor $s1,$t1 @ sigma0(X[i+1])
- vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15
- vadd.i64 @X[$i%8],$s1
-___
- &NEON_00_15(2*$i,@_);
-}
-
-$code.=<<___;
-#if __ARM_ARCH__>=7
-.fpu neon
-
-.align 4
-.LNEON:
- dmb @ errata #451034 on early Cortex A8
- vstmdb sp!,{d8-d15} @ ABI specification says so
- sub $Ktbl,r3,#672 @ K512
- vldmia $ctx,{$A-$H} @ load context
-.Loop_neon:
-___
-for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- mov $cnt,#4
-.L16_79_neon:
- subs $cnt,#1
-___
-for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- bne .L16_79_neon
-
- vldmia $ctx,{d24-d31} @ load context to temp
- vadd.i64 q8,q12 @ vectorized accumulate
- vadd.i64 q9,q13
- vadd.i64 q10,q14
- vadd.i64 q11,q15
- vstmia $ctx,{$A-$H} @ save context
- teq $inp,$len
- sub $Ktbl,#640 @ rewind K512
- bne .Loop_neon
-
- vldmia sp!,{d8-d15} @ epilogue
- ret @ bx lr
-#endif
-___
-}
-$code.=<<___;
-.size sha512_block_data_order,.-sha512_block_data_order
-.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-$code =~ s/\bret\b/bx lr/gm;
-print $code;
-close STDOUT; # enforce flush
diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.S b/app/openssl/crypto/sha/asm/sha512-armv8.S
deleted file mode 100644
index 6b0d1940..00000000
--- a/app/openssl/crypto/sha/asm/sha512-armv8.S
+++ /dev/null
@@ -1,1021 +0,0 @@
-#include "arm_arch.h"
-
-.text
-
-.globl sha512_block_data_order
-.type sha512_block_data_order,%function
-.align 6
-sha512_block_data_order:
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*8
-
- ldp x20,x21,[x0] // load context
- ldp x22,x23,[x0,#2*8]
- ldp x24,x25,[x0,#4*8]
- add x2,x1,x2,lsl#7 // end of input
- ldp x26,x27,[x0,#6*8]
- adr x30,K512
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp x3,x4,[x1],#2*8
- ldr x19,[x30],#8 // *K++
- eor x28,x21,x22 // magic seed
- str x1,[x29,#112]
-#ifndef __ARMEB__
- rev x3,x3 // 0
-#endif
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x6,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x3 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x4,x4 // 1
-#endif
- ldp x5,x6,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x7,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x4 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x5,x5 // 2
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x8,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x5 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x6,x6 // 3
-#endif
- ldp x7,x8,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x9,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x6 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x7,x7 // 4
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x10,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x7 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x10,ror#18 // Sigma1(e)
- ror x10,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x10,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x8,x8 // 5
-#endif
- ldp x9,x10,[x1],#2*8
- add x23,x23,x17 // h+=Sigma0(a)
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x11,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x8 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x11,ror#18 // Sigma1(e)
- ror x11,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x11,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x9,x9 // 6
-#endif
- add x22,x22,x17 // h+=Sigma0(a)
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x12,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x9 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x12,ror#18 // Sigma1(e)
- ror x12,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x12,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x10,x10 // 7
-#endif
- ldp x11,x12,[x1],#2*8
- add x21,x21,x17 // h+=Sigma0(a)
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- eor x13,x25,x25,ror#23
- and x17,x26,x25
- bic x28,x27,x25
- add x20,x20,x10 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x13,ror#18 // Sigma1(e)
- ror x13,x21,#28
- add x20,x20,x17 // h+=Ch(e,f,g)
- eor x17,x21,x21,ror#5
- add x20,x20,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x24,x24,x20 // d+=h
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x13,x17,ror#34 // Sigma0(a)
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x20,x20,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x11,x11 // 8
-#endif
- add x20,x20,x17 // h+=Sigma0(a)
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x14,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x11 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x14,ror#18 // Sigma1(e)
- ror x14,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x14,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x12,x12 // 9
-#endif
- ldp x13,x14,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x15,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x12 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x15,ror#18 // Sigma1(e)
- ror x15,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x15,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x13,x13 // 10
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x0,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x13 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x0,ror#18 // Sigma1(e)
- ror x0,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x0,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x14,x14 // 11
-#endif
- ldp x15,x0,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x6,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x14 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x15,x15 // 12
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x7,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x15 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x0,x0 // 13
-#endif
- ldp x1,x2,[x1]
- add x23,x23,x17 // h+=Sigma0(a)
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x8,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x0 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x1,x1 // 14
-#endif
- ldr x6,[sp,#24]
- add x22,x22,x17 // h+=Sigma0(a)
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x9,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x1 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __ARMEB__
- rev x2,x2 // 15
-#endif
- ldr x7,[sp,#0]
- add x21,x21,x17 // h+=Sigma0(a)
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
-.Loop_16_xx:
- ldr x8,[sp,#8]
- str x11,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x10,x5,#1
- and x17,x25,x24
- ror x9,x2,#19
- bic x19,x26,x24
- ror x11,x20,#28
- add x27,x27,x3 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x10,x10,x5,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x11,x11,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x9,x9,x2,ror#61
- eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x11,x20,ror#39 // Sigma0(a)
- eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
- add x4,x4,x13
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x4,x4,x10
- add x27,x27,x17 // h+=Sigma0(a)
- add x4,x4,x9
- ldr x9,[sp,#16]
- str x12,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x11,x6,#1
- and x17,x24,x23
- ror x10,x3,#19
- bic x28,x25,x23
- ror x12,x27,#28
- add x26,x26,x4 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x11,x11,x6,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x12,x12,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x10,x10,x3,ror#61
- eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x12,x27,ror#39 // Sigma0(a)
- eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
- add x5,x5,x14
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x5,x5,x11
- add x26,x26,x17 // h+=Sigma0(a)
- add x5,x5,x10
- ldr x10,[sp,#24]
- str x13,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x12,x7,#1
- and x17,x23,x22
- ror x11,x4,#19
- bic x19,x24,x22
- ror x13,x26,#28
- add x25,x25,x5 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x12,x12,x7,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x13,x13,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x11,x11,x4,ror#61
- eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x13,x26,ror#39 // Sigma0(a)
- eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
- add x6,x6,x15
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x6,x6,x12
- add x25,x25,x17 // h+=Sigma0(a)
- add x6,x6,x11
- ldr x11,[sp,#0]
- str x14,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x13,x8,#1
- and x17,x22,x21
- ror x12,x5,#19
- bic x28,x23,x21
- ror x14,x25,#28
- add x24,x24,x6 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x13,x13,x8,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x14,x14,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x12,x12,x5,ror#61
- eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x14,x25,ror#39 // Sigma0(a)
- eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
- add x7,x7,x0
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x7,x7,x13
- add x24,x24,x17 // h+=Sigma0(a)
- add x7,x7,x12
- ldr x12,[sp,#8]
- str x15,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x14,x9,#1
- and x17,x21,x20
- ror x13,x6,#19
- bic x19,x22,x20
- ror x15,x24,#28
- add x23,x23,x7 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x14,x14,x9,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x15,x15,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x13,x13,x6,ror#61
- eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x15,x24,ror#39 // Sigma0(a)
- eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
- add x8,x8,x1
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x8,x8,x14
- add x23,x23,x17 // h+=Sigma0(a)
- add x8,x8,x13
- ldr x13,[sp,#16]
- str x0,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x15,x10,#1
- and x17,x20,x27
- ror x14,x7,#19
- bic x28,x21,x27
- ror x0,x23,#28
- add x22,x22,x8 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x15,x15,x10,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x0,x0,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x14,x14,x7,ror#61
- eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x0,x23,ror#39 // Sigma0(a)
- eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
- add x9,x9,x2
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x9,x9,x15
- add x22,x22,x17 // h+=Sigma0(a)
- add x9,x9,x14
- ldr x14,[sp,#24]
- str x1,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x0,x11,#1
- and x17,x27,x26
- ror x15,x8,#19
- bic x19,x20,x26
- ror x1,x22,#28
- add x21,x21,x9 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x0,x0,x11,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x1,x1,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x15,x15,x8,ror#61
- eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x1,x22,ror#39 // Sigma0(a)
- eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
- add x10,x10,x3
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x10,x10,x0
- add x21,x21,x17 // h+=Sigma0(a)
- add x10,x10,x15
- ldr x15,[sp,#0]
- str x2,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x1,x12,#1
- and x17,x26,x25
- ror x0,x9,#19
- bic x28,x27,x25
- ror x2,x21,#28
- add x20,x20,x10 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x1,x1,x12,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x2,x2,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x0,x0,x9,ror#61
- eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x2,x21,ror#39 // Sigma0(a)
- eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
- add x11,x11,x4
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x11,x11,x1
- add x20,x20,x17 // h+=Sigma0(a)
- add x11,x11,x0
- ldr x0,[sp,#8]
- str x3,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x2,x13,#1
- and x17,x25,x24
- ror x1,x10,#19
- bic x19,x26,x24
- ror x3,x20,#28
- add x27,x27,x11 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x2,x2,x13,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x3,x3,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x1,x1,x10,ror#61
- eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x3,x20,ror#39 // Sigma0(a)
- eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
- add x12,x12,x5
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x12,x12,x2
- add x27,x27,x17 // h+=Sigma0(a)
- add x12,x12,x1
- ldr x1,[sp,#16]
- str x4,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x3,x14,#1
- and x17,x24,x23
- ror x2,x11,#19
- bic x28,x25,x23
- ror x4,x27,#28
- add x26,x26,x12 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x3,x3,x14,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x4,x4,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x2,x2,x11,ror#61
- eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x4,x27,ror#39 // Sigma0(a)
- eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
- add x13,x13,x6
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x13,x13,x3
- add x26,x26,x17 // h+=Sigma0(a)
- add x13,x13,x2
- ldr x2,[sp,#24]
- str x5,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x4,x15,#1
- and x17,x23,x22
- ror x3,x12,#19
- bic x19,x24,x22
- ror x5,x26,#28
- add x25,x25,x13 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x4,x4,x15,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x5,x5,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x3,x3,x12,ror#61
- eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x5,x26,ror#39 // Sigma0(a)
- eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
- add x14,x14,x7
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x14,x14,x4
- add x25,x25,x17 // h+=Sigma0(a)
- add x14,x14,x3
- ldr x3,[sp,#0]
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x5,x0,#1
- and x17,x22,x21
- ror x4,x13,#19
- bic x28,x23,x21
- ror x6,x25,#28
- add x24,x24,x14 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x5,x5,x0,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x6,x6,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x4,x4,x13,ror#61
- eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x25,ror#39 // Sigma0(a)
- eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
- add x15,x15,x8
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x15,x15,x5
- add x24,x24,x17 // h+=Sigma0(a)
- add x15,x15,x4
- ldr x4,[sp,#8]
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x6,x1,#1
- and x17,x21,x20
- ror x5,x14,#19
- bic x19,x22,x20
- ror x7,x24,#28
- add x23,x23,x15 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x6,x6,x1,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x7,x7,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x5,x5,x14,ror#61
- eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x24,ror#39 // Sigma0(a)
- eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
- add x0,x0,x9
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x0,x0,x6
- add x23,x23,x17 // h+=Sigma0(a)
- add x0,x0,x5
- ldr x5,[sp,#16]
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x7,x2,#1
- and x17,x20,x27
- ror x6,x15,#19
- bic x28,x21,x27
- ror x8,x23,#28
- add x22,x22,x0 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x7,x7,x2,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x8,x8,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x6,x6,x15,ror#61
- eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x23,ror#39 // Sigma0(a)
- eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
- add x1,x1,x10
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x1,x1,x7
- add x22,x22,x17 // h+=Sigma0(a)
- add x1,x1,x6
- ldr x6,[sp,#24]
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x8,x3,#1
- and x17,x27,x26
- ror x7,x0,#19
- bic x19,x20,x26
- ror x9,x22,#28
- add x21,x21,x1 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x8,x8,x3,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x9,x9,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x7,x7,x0,ror#61
- eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x22,ror#39 // Sigma0(a)
- eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
- add x2,x2,x11
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x2,x2,x8
- add x21,x21,x17 // h+=Sigma0(a)
- add x2,x2,x7
- ldr x7,[sp,#0]
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
- cbnz x19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#648 // rewind
-
- ldp x3,x4,[x0]
- ldp x5,x6,[x0,#2*8]
- add x1,x1,#14*8 // advance input pointer
- ldp x7,x8,[x0,#4*8]
- add x20,x20,x3
- ldp x9,x10,[x0,#6*8]
- add x21,x21,x4
- add x22,x22,x5
- add x23,x23,x6
- stp x20,x21,[x0]
- add x24,x24,x7
- add x25,x25,x8
- stp x22,x23,[x0,#2*8]
- add x26,x26,x9
- add x27,x27,x10
- cmp x1,x2
- stp x24,x25,[x0,#4*8]
- stp x26,x27,[x0,#6*8]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*8
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha512_block_data_order,.-sha512_block_data_order
-
-.align 6
-.type K512,%object
-K512:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
- .quad 0 // terminator
-.size K512,.-K512
-.align 3
-.LOPENSSL_armcap_P:
- .quad OPENSSL_armcap_P-.
-.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-.comm OPENSSL_armcap_P,4,4
diff --git a/app/openssl/crypto/sha/asm/sha512-armv8.pl b/app/openssl/crypto/sha/asm/sha512-armv8.pl
deleted file mode 100644
index 6935ed65..00000000
--- a/app/openssl/crypto/sha/asm/sha512-armv8.pl
+++ /dev/null
@@ -1,414 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# SHA256/512 for ARMv8.
-#
-# Performance in cycles per processed byte and improvement coefficient
-# over code generated with "default" compiler:
-#
-# SHA256-hw SHA256(*) SHA512
-# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
-# Cortex-A5x n/a n/a n/a
-#
-# (*) Software SHA256 results are of lesser relevance, presented
-# mostly for informational purposes.
-# (**) The result is a trade-off: it's possible to improve it by
-# 10%, but at the cost of 20% loss on Cortex-A5x.
-
-$flavour=shift;
-$output=shift;
-open STDOUT,">$output";
-
-if ($output =~ /512/) {
- $BITS=512;
- $SZ=8;
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=(1, 8, 7);
- @sigma1=(19,61, 6);
- $rounds=80;
- $reg_t="x";
-} else {
- $BITS=256;
- $SZ=4;
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 7,18, 3);
- @sigma1=(17,19,10);
- $rounds=64;
- $reg_t="w";
-}
-
-$func="sha${BITS}_block_data_order";
-
-($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
-
-@X=map("$reg_t$_",(3..15,0..2));
-@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
-($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
-
-sub BODY_00_xx {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-my $j=($i+1)&15;
-my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
- $T0=@X[$i+3] if ($i<11);
-
-$code.=<<___ if ($i<16);
-#ifndef __ARMEB__
- rev @X[$i],@X[$i] // $i
-#endif
-___
-$code.=<<___ if ($i<13 && ($i&1));
- ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ
-___
-$code.=<<___ if ($i==13);
- ldp @X[14],@X[15],[$inp]
-___
-$code.=<<___ if ($i>=14);
- ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
-___
-$code.=<<___ if ($i>0 && $i<16);
- add $a,$a,$t1 // h+=Sigma0(a)
-___
-$code.=<<___ if ($i>=11);
- str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
-___
-# While ARMv8 specifies merged rotate-n-logical operation such as
-# 'eor x,y,z,ror#n', it was found to negatively affect performance
-# on Apple A7. The reason seems to be that it requires even 'y' to
-# be available earlier. This means that such merged instruction is
-# not necessarily best choice on critical path... On the other hand
-# Cortex-A5x handles merged instructions much better than disjoint
-# rotate and logical... See (**) footnote above.
-$code.=<<___ if ($i<15);
- ror $t0,$e,#$Sigma1[0]
- add $h,$h,$t2 // h+=K[i]
- eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
- and $t1,$f,$e
- bic $t2,$g,$e
- add $h,$h,@X[$i&15] // h+=X[i]
- orr $t1,$t1,$t2 // Ch(e,f,g)
- eor $t2,$a,$b // a^b, b^c in next round
- eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e)
- ror $T0,$a,#$Sigma0[0]
- add $h,$h,$t1 // h+=Ch(e,f,g)
- eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
- add $h,$h,$t0 // h+=Sigma1(e)
- and $t3,$t3,$t2 // (b^c)&=(a^b)
- add $d,$d,$h // d+=h
- eor $t3,$t3,$b // Maj(a,b,c)
- eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a)
- add $h,$h,$t3 // h+=Maj(a,b,c)
- ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
- //add $h,$h,$t1 // h+=Sigma0(a)
-___
-$code.=<<___ if ($i>=15);
- ror $t0,$e,#$Sigma1[0]
- add $h,$h,$t2 // h+=K[i]
- ror $T1,@X[($j+1)&15],#$sigma0[0]
- and $t1,$f,$e
- ror $T2,@X[($j+14)&15],#$sigma1[0]
- bic $t2,$g,$e
- ror $T0,$a,#$Sigma0[0]
- add $h,$h,@X[$i&15] // h+=X[i]
- eor $t0,$t0,$e,ror#$Sigma1[1]
- eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
- orr $t1,$t1,$t2 // Ch(e,f,g)
- eor $t2,$a,$b // a^b, b^c in next round
- eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e)
- eor $T0,$T0,$a,ror#$Sigma0[1]
- add $h,$h,$t1 // h+=Ch(e,f,g)
- and $t3,$t3,$t2 // (b^c)&=(a^b)
- eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
- eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1])
- add $h,$h,$t0 // h+=Sigma1(e)
- eor $t3,$t3,$b // Maj(a,b,c)
- eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a)
- eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14])
- add @X[$j],@X[$j],@X[($j+9)&15]
- add $d,$d,$h // d+=h
- add $h,$h,$t3 // h+=Maj(a,b,c)
- ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
- add @X[$j],@X[$j],$T1
- add $h,$h,$t1 // h+=Sigma0(a)
- add @X[$j],@X[$j],$T2
-___
- ($t2,$t3)=($t3,$t2);
-}
-
-$code.=<<___;
-#include "arm_arch.h"
-
-.text
-
-.globl $func
-.type $func,%function
-.align 6
-$func:
-___
-$code.=<<___ if ($SZ==4);
- ldr x16,.LOPENSSL_armcap_P
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA256
- b.ne .Lv8_entry
-___
-$code.=<<___;
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*$SZ
-
- ldp $A,$B,[$ctx] // load context
- ldp $C,$D,[$ctx,#2*$SZ]
- ldp $E,$F,[$ctx,#4*$SZ]
- add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
- ldp $G,$H,[$ctx,#6*$SZ]
- adr $Ktbl,K$BITS
- stp $ctx,$num,[x29,#96]
-
-.Loop:
- ldp @X[0],@X[1],[$inp],#2*$SZ
- ldr $t2,[$Ktbl],#$SZ // *K++
- eor $t3,$B,$C // magic seed
- str $inp,[x29,#112]
-___
-for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
-$code.=".Loop_16_xx:\n";
-for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- cbnz $t2,.Loop_16_xx
-
- ldp $ctx,$num,[x29,#96]
- ldr $inp,[x29,#112]
- sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind
-
- ldp @X[0],@X[1],[$ctx]
- ldp @X[2],@X[3],[$ctx,#2*$SZ]
- add $inp,$inp,#14*$SZ // advance input pointer
- ldp @X[4],@X[5],[$ctx,#4*$SZ]
- add $A,$A,@X[0]
- ldp @X[6],@X[7],[$ctx,#6*$SZ]
- add $B,$B,@X[1]
- add $C,$C,@X[2]
- add $D,$D,@X[3]
- stp $A,$B,[$ctx]
- add $E,$E,@X[4]
- add $F,$F,@X[5]
- stp $C,$D,[$ctx,#2*$SZ]
- add $G,$G,@X[6]
- add $H,$H,@X[7]
- cmp $inp,$num
- stp $E,$F,[$ctx,#4*$SZ]
- stp $G,$H,[$ctx,#6*$SZ]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*$SZ
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size $func,.-$func
-
-.align 6
-.type K$BITS,%object
-K$BITS:
-___
-$code.=<<___ if ($SZ==8);
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
- .quad 0 // terminator
-___
-$code.=<<___ if ($SZ==4);
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
- .long 0 //terminator
-___
-$code.=<<___;
-.size K$BITS,.-K$BITS
-.align 3
-.LOPENSSL_armcap_P:
- .quad OPENSSL_armcap_P-.
-.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-___
-
-if ($SZ==4) {
-my $Ktbl="x3";
-
-my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
-my @MSG=map("v$_.16b",(4..7));
-my ($W0,$W1)=("v16.4s","v17.4s");
-my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
-
-$code.=<<___;
-.type sha256_block_armv8,%function
-.align 6
-sha256_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- ld1.32 {$ABCD,$EFGH},[$ctx]
- adr $Ktbl,K256
-
-.Loop_hw:
- ld1 {@MSG[0]-@MSG[3]},[$inp],#64
- sub $num,$num,#1
- ld1.32 {$W0},[$Ktbl],#16
- rev32 @MSG[0],@MSG[0]
- rev32 @MSG[1],@MSG[1]
- rev32 @MSG[2],@MSG[2]
- rev32 @MSG[3],@MSG[3]
- orr $ABCD_SAVE,$ABCD,$ABCD // offload
- orr $EFGH_SAVE,$EFGH,$EFGH
-___
-for($i=0;$i<12;$i++) {
-$code.=<<___;
- ld1.32 {$W1},[$Ktbl],#16
- add.i32 $W0,$W0,@MSG[0]
- sha256su0 @MSG[0],@MSG[1]
- orr $abcd,$ABCD,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
- sha256su1 @MSG[0],@MSG[2],@MSG[3]
-___
- ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
-}
-$code.=<<___;
- ld1.32 {$W1},[$Ktbl],#16
- add.i32 $W0,$W0,@MSG[0]
- orr $abcd,$ABCD,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
-
- ld1.32 {$W0},[$Ktbl],#16
- add.i32 $W1,$W1,@MSG[1]
- orr $abcd,$ABCD,$ABCD
- sha256h $ABCD,$EFGH,$W1
- sha256h2 $EFGH,$abcd,$W1
-
- ld1.32 {$W1},[$Ktbl]
- add.i32 $W0,$W0,@MSG[2]
- sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind
- orr $abcd,$ABCD,$ABCD
- sha256h $ABCD,$EFGH,$W0
- sha256h2 $EFGH,$abcd,$W0
-
- add.i32 $W1,$W1,@MSG[3]
- orr $abcd,$ABCD,$ABCD
- sha256h $ABCD,$EFGH,$W1
- sha256h2 $EFGH,$abcd,$W1
-
- add.i32 $ABCD,$ABCD,$ABCD_SAVE
- add.i32 $EFGH,$EFGH,$EFGH_SAVE
-
- cbnz $num,.Loop_hw
-
- st1.32 {$ABCD,$EFGH},[$ctx]
-
- ldr x29,[sp],#16
- ret
-.size sha256_block_armv8,.-sha256_block_armv8
-___
-}
-
-$code.=<<___;
-.comm OPENSSL_armcap_P,4,4
-___
-
-{ my %opcode = (
- "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000,
- "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 );
-
- sub unsha256 {
- my ($mnemonic,$arg)=@_;
-
- $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
- &&
- sprintf ".inst\t0x%08x\t//%s %s",
- $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
- $mnemonic,$arg;
- }
-}
-
-foreach(split("\n",$code)) {
-
- s/\`([^\`]*)\`/eval($1)/geo;
-
- s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
-
- s/\.\w?32\b//o and s/\.16b/\.4s/go;
- m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
-
- print $_,"\n";
-}
-
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-ia64.pl b/app/openssl/crypto/sha/asm/sha512-ia64.pl
deleted file mode 100755
index 1c6ce565..00000000
--- a/app/openssl/crypto/sha/asm/sha512-ia64.pl
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# SHA256/512_Transform for Itanium.
-#
-# sha512_block runs in 1003 cycles on Itanium 2, which is almost 50%
-# faster than gcc and >60%(!) faster than code generated by HP-UX
-# compiler (yes, HP-UX is generating slower code, because unlike gcc,
-# it failed to deploy "shift right pair," 'shrp' instruction, which
-# substitutes for 64-bit rotate).
-#
-# 924 cycles long sha256_block outperforms gcc by over factor of 2(!)
-# and HP-UX compiler - by >40% (yes, gcc won sha512_block, but lost
-# this one big time). Note that "formally" 924 is about 100 cycles
-# too much. I mean it's 64 32-bit rounds vs. 80 virtually identical
-# 64-bit ones and 1003*64/80 gives 802. Extra cycles, 2 per round,
-# are spent on extra work to provide for 32-bit rotations. 32-bit
-# rotations are still handled by 'shrp' instruction and for this
-# reason lower 32 bits are deposited to upper half of 64-bit register
-# prior 'shrp' issue. And in order to minimize the amount of such
-# operations, X[16] values are *maintained* with copies of lower
-# halves in upper halves, which is why you'll spot such instructions
-# as custom 'mux2', "parallel 32-bit add," 'padd4' and "parallel
-# 32-bit unsigned right shift," 'pshr4.u' instructions here.
-#
-# Rules of engagement.
-#
-# There is only one integer shifter meaning that if I have two rotate,
-# deposit or extract instructions in adjacent bundles, they shall
-# split [at run-time if they have to]. But note that variable and
-# parallel shifts are performed by multi-media ALU and *are* pairable
-# with rotates [and alike]. On the backside MMALU is rather slow: it
-# takes 2 extra cycles before the result of integer operation is
-# available *to* MMALU and 2(*) extra cycles before the result of MM
-# operation is available "back" *to* integer ALU, not to mention that
-# MMALU itself has 2 cycles latency. However! I explicitly scheduled
-# these MM instructions to avoid MM stalls, so that all these extra
-# latencies get "hidden" in instruction-level parallelism.
-#
-# (*) 2 cycles on Itanium 1 and 1 cycle on Itanium 2. But I schedule
-# for 2 in order to provide for best *overall* performance,
-# because on Itanium 1 stall on MM result is accompanied by
-# pipeline flush, which takes 6 cycles:-(
-#
-# Resulting performance numbers for 900MHz Itanium 2 system:
-#
-# The 'numbers' are in 1000s of bytes per second processed.
-# type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
-# sha1(*) 6210.14k 20376.30k 52447.83k 85870.05k 105478.12k
-# sha256 7476.45k 20572.05k 41538.34k 56062.29k 62093.18k
-# sha512 4996.56k 20026.28k 47597.20k 85278.79k 111501.31k
-#
-# (*) SHA1 numbers are for HP-UX compiler and are presented purely
-# for reference purposes. I bet it can improved too...
-#
-# To generate code, pass the file name with either 256 or 512 in its
-# name and compiler flags.
-
-$output=shift;
-
-if ($output =~ /512.*\.[s|asm]/) {
- $SZ=8;
- $BITS=8*$SZ;
- $LDW="ld8";
- $STW="st8";
- $ADD="add";
- $SHRU="shr.u";
- $TABLE="K512";
- $func="sha512_block_data_order";
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=(1, 8, 7);
- @sigma1=(19,61, 6);
- $rounds=80;
-} elsif ($output =~ /256.*\.[s|asm]/) {
- $SZ=4;
- $BITS=8*$SZ;
- $LDW="ld4";
- $STW="st4";
- $ADD="padd4";
- $SHRU="pshr4.u";
- $TABLE="K256";
- $func="sha256_block_data_order";
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 7,18, 3);
- @sigma1=(17,19,10);
- $rounds=64;
-} else { die "nonsense $output"; }
-
-open STDOUT,">$output" || die "can't open $output: $!";
-
-if ($^O eq "hpux") {
- $ADDP="addp4";
- for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
-} else { $ADDP="add"; }
-for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/);
- $big_endian=0 if (/\-DL_ENDIAN/); }
-if (!defined($big_endian))
- { $big_endian=(unpack('L',pack('N',1))==1); }
-
-$code=<<___;
-.ident \"$output, version 1.1\"
-.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
-.explicit
-.text
-
-pfssave=r2;
-lcsave=r3;
-prsave=r14;
-K=r15;
-A=r16; B=r17; C=r18; D=r19;
-E=r20; F=r21; G=r22; H=r23;
-T1=r24; T2=r25;
-s0=r26; s1=r27; t0=r28; t1=r29;
-Ktbl=r30;
-ctx=r31; // 1st arg
-input=r48; // 2nd arg
-num=r49; // 3rd arg
-sgm0=r50; sgm1=r51; // small constants
-A_=r54; B_=r55; C_=r56; D_=r57;
-E_=r58; F_=r59; G_=r60; H_=r61;
-
-// void $func (SHA_CTX *ctx, const void *in,size_t num[,int host])
-.global $func#
-.proc $func#
-.align 32
-$func:
- .prologue
- .save ar.pfs,pfssave
-{ .mmi; alloc pfssave=ar.pfs,3,27,0,16
- $ADDP ctx=0,r32 // 1st arg
- .save ar.lc,lcsave
- mov lcsave=ar.lc }
-{ .mmi; $ADDP input=0,r33 // 2nd arg
- mov num=r34 // 3rd arg
- .save pr,prsave
- mov prsave=pr };;
-
- .body
-{ .mib; add r8=0*$SZ,ctx
- add r9=1*$SZ,ctx
- brp.loop.imp .L_first16,.L_first16_end-16 }
-{ .mib; add r10=2*$SZ,ctx
- add r11=3*$SZ,ctx
- brp.loop.imp .L_rest,.L_rest_end-16 };;
-
-// load A-H
-.Lpic_point:
-{ .mmi; $LDW A_=[r8],4*$SZ
- $LDW B_=[r9],4*$SZ
- mov Ktbl=ip }
-{ .mmi; $LDW C_=[r10],4*$SZ
- $LDW D_=[r11],4*$SZ
- mov sgm0=$sigma0[2] };;
-{ .mmi; $LDW E_=[r8]
- $LDW F_=[r9]
- add Ktbl=($TABLE#-.Lpic_point),Ktbl }
-{ .mmi; $LDW G_=[r10]
- $LDW H_=[r11]
- cmp.ne p0,p16=0,r0 };; // used in sha256_block
-___
-$code.=<<___ if ($BITS==64);
-{ .mii; and r8=7,input
- and input=~7,input;;
- cmp.eq p9,p0=1,r8 }
-{ .mmi; cmp.eq p10,p0=2,r8
- cmp.eq p11,p0=3,r8
- cmp.eq p12,p0=4,r8 }
-{ .mmi; cmp.eq p13,p0=5,r8
- cmp.eq p14,p0=6,r8
- cmp.eq p15,p0=7,r8 };;
-___
-$code.=<<___;
-.L_outer:
-.rotr X[16]
-{ .mmi; mov A=A_
- mov B=B_
- mov ar.lc=14 }
-{ .mmi; mov C=C_
- mov D=D_
- mov E=E_ }
-{ .mmi; mov F=F_
- mov G=G_
- mov ar.ec=2 }
-{ .mmi; ld1 X[15]=[input],$SZ // eliminated in 64-bit
- mov H=H_
- mov sgm1=$sigma1[2] };;
-
-___
-$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
-.align 32
-.L_first16:
-{ .mmi; add r9=1-$SZ,input
- add r10=2-$SZ,input
- add r11=3-$SZ,input };;
-{ .mmi; ld1 r9=[r9]
- ld1 r10=[r10]
- dep.z $t1=E,32,32 }
-{ .mmi; $LDW K=[Ktbl],$SZ
- ld1 r11=[r11]
- zxt4 E=E };;
-{ .mii; or $t1=$t1,E
- dep X[15]=X[15],r9,8,8
- dep r11=r10,r11,8,8 };;
-{ .mmi; and T1=F,E
- and T2=A,B
- dep X[15]=X[15],r11,16,16 }
-{ .mmi; andcm r8=G,E
- and r9=A,C
- mux2 $t0=A,0x44 };; // copy lower half to upper
-{ .mmi; (p16) ld1 X[15-1]=[input],$SZ // prefetch
- xor T1=T1,r8 // T1=((e & f) ^ (~e & g))
- _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14)
-{ .mib; and r10=B,C
- xor T2=T2,r9 };;
-___
-$t0="A", $t1="E", $code.=<<___ if ($BITS==64);
-// in 64-bit mode I load whole X[16] at once and take care of alignment...
-{ .mmi; add r8=1*$SZ,input
- add r9=2*$SZ,input
- add r10=3*$SZ,input };;
-{ .mmb; $LDW X[15]=[input],4*$SZ
- $LDW X[14]=[r8],4*$SZ
-(p9) br.cond.dpnt.many .L1byte };;
-{ .mmb; $LDW X[13]=[r9],4*$SZ
- $LDW X[12]=[r10],4*$SZ
-(p10) br.cond.dpnt.many .L2byte };;
-{ .mmb; $LDW X[11]=[input],4*$SZ
- $LDW X[10]=[r8],4*$SZ
-(p11) br.cond.dpnt.many .L3byte };;
-{ .mmb; $LDW X[ 9]=[r9],4*$SZ
- $LDW X[ 8]=[r10],4*$SZ
-(p12) br.cond.dpnt.many .L4byte };;
-{ .mmb; $LDW X[ 7]=[input],4*$SZ
- $LDW X[ 6]=[r8],4*$SZ
-(p13) br.cond.dpnt.many .L5byte };;
-{ .mmb; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
-(p14) br.cond.dpnt.many .L6byte };;
-{ .mmb; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
-(p15) br.cond.dpnt.many .L7byte };;
-{ .mmb; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- br.many .L_first16 };;
-.L1byte:
-{ .mmi; $LDW X[13]=[r9],4*$SZ
- $LDW X[12]=[r10],4*$SZ
- shrp X[15]=X[15],X[14],56 };;
-{ .mmi; $LDW X[11]=[input],4*$SZ
- $LDW X[10]=[r8],4*$SZ
- shrp X[14]=X[14],X[13],56 }
-{ .mmi; $LDW X[ 9]=[r9],4*$SZ
- $LDW X[ 8]=[r10],4*$SZ
- shrp X[13]=X[13],X[12],56 };;
-{ .mmi; $LDW X[ 7]=[input],4*$SZ
- $LDW X[ 6]=[r8],4*$SZ
- shrp X[12]=X[12],X[11],56 }
-{ .mmi; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
- shrp X[11]=X[11],X[10],56 };;
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[10]=X[10],X[ 9],56 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[ 9]=X[ 9],X[ 8],56 };;
-{ .mii; $LDW T1=[input]
- shrp X[ 8]=X[ 8],X[ 7],56
- shrp X[ 7]=X[ 7],X[ 6],56 }
-{ .mii; shrp X[ 6]=X[ 6],X[ 5],56
- shrp X[ 5]=X[ 5],X[ 4],56 };;
-{ .mii; shrp X[ 4]=X[ 4],X[ 3],56
- shrp X[ 3]=X[ 3],X[ 2],56 }
-{ .mii; shrp X[ 2]=X[ 2],X[ 1],56
- shrp X[ 1]=X[ 1],X[ 0],56 }
-{ .mib; shrp X[ 0]=X[ 0],T1,56
- br.many .L_first16 };;
-.L2byte:
-{ .mmi; $LDW X[11]=[input],4*$SZ
- $LDW X[10]=[r8],4*$SZ
- shrp X[15]=X[15],X[14],48 }
-{ .mmi; $LDW X[ 9]=[r9],4*$SZ
- $LDW X[ 8]=[r10],4*$SZ
- shrp X[14]=X[14],X[13],48 };;
-{ .mmi; $LDW X[ 7]=[input],4*$SZ
- $LDW X[ 6]=[r8],4*$SZ
- shrp X[13]=X[13],X[12],48 }
-{ .mmi; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
- shrp X[12]=X[12],X[11],48 };;
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[11]=X[11],X[10],48 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[10]=X[10],X[ 9],48 };;
-{ .mii; $LDW T1=[input]
- shrp X[ 9]=X[ 9],X[ 8],48
- shrp X[ 8]=X[ 8],X[ 7],48 }
-{ .mii; shrp X[ 7]=X[ 7],X[ 6],48
- shrp X[ 6]=X[ 6],X[ 5],48 };;
-{ .mii; shrp X[ 5]=X[ 5],X[ 4],48
- shrp X[ 4]=X[ 4],X[ 3],48 }
-{ .mii; shrp X[ 3]=X[ 3],X[ 2],48
- shrp X[ 2]=X[ 2],X[ 1],48 }
-{ .mii; shrp X[ 1]=X[ 1],X[ 0],48
- shrp X[ 0]=X[ 0],T1,48 }
-{ .mfb; br.many .L_first16 };;
-.L3byte:
-{ .mmi; $LDW X[ 9]=[r9],4*$SZ
- $LDW X[ 8]=[r10],4*$SZ
- shrp X[15]=X[15],X[14],40 };;
-{ .mmi; $LDW X[ 7]=[input],4*$SZ
- $LDW X[ 6]=[r8],4*$SZ
- shrp X[14]=X[14],X[13],40 }
-{ .mmi; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
- shrp X[13]=X[13],X[12],40 };;
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[12]=X[12],X[11],40 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[11]=X[11],X[10],40 };;
-{ .mii; $LDW T1=[input]
- shrp X[10]=X[10],X[ 9],40
- shrp X[ 9]=X[ 9],X[ 8],40 }
-{ .mii; shrp X[ 8]=X[ 8],X[ 7],40
- shrp X[ 7]=X[ 7],X[ 6],40 };;
-{ .mii; shrp X[ 6]=X[ 6],X[ 5],40
- shrp X[ 5]=X[ 5],X[ 4],40 }
-{ .mii; shrp X[ 4]=X[ 4],X[ 3],40
- shrp X[ 3]=X[ 3],X[ 2],40 }
-{ .mii; shrp X[ 2]=X[ 2],X[ 1],40
- shrp X[ 1]=X[ 1],X[ 0],40 }
-{ .mib; shrp X[ 0]=X[ 0],T1,40
- br.many .L_first16 };;
-.L4byte:
-{ .mmi; $LDW X[ 7]=[input],4*$SZ
- $LDW X[ 6]=[r8],4*$SZ
- shrp X[15]=X[15],X[14],32 }
-{ .mmi; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
- shrp X[14]=X[14],X[13],32 };;
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[13]=X[13],X[12],32 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[12]=X[12],X[11],32 };;
-{ .mii; $LDW T1=[input]
- shrp X[11]=X[11],X[10],32
- shrp X[10]=X[10],X[ 9],32 }
-{ .mii; shrp X[ 9]=X[ 9],X[ 8],32
- shrp X[ 8]=X[ 8],X[ 7],32 };;
-{ .mii; shrp X[ 7]=X[ 7],X[ 6],32
- shrp X[ 6]=X[ 6],X[ 5],32 }
-{ .mii; shrp X[ 5]=X[ 5],X[ 4],32
- shrp X[ 4]=X[ 4],X[ 3],32 }
-{ .mii; shrp X[ 3]=X[ 3],X[ 2],32
- shrp X[ 2]=X[ 2],X[ 1],32 }
-{ .mii; shrp X[ 1]=X[ 1],X[ 0],32
- shrp X[ 0]=X[ 0],T1,32 }
-{ .mfb; br.many .L_first16 };;
-.L5byte:
-{ .mmi; $LDW X[ 5]=[r9],4*$SZ
- $LDW X[ 4]=[r10],4*$SZ
- shrp X[15]=X[15],X[14],24 };;
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[14]=X[14],X[13],24 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[13]=X[13],X[12],24 };;
-{ .mii; $LDW T1=[input]
- shrp X[12]=X[12],X[11],24
- shrp X[11]=X[11],X[10],24 }
-{ .mii; shrp X[10]=X[10],X[ 9],24
- shrp X[ 9]=X[ 9],X[ 8],24 };;
-{ .mii; shrp X[ 8]=X[ 8],X[ 7],24
- shrp X[ 7]=X[ 7],X[ 6],24 }
-{ .mii; shrp X[ 6]=X[ 6],X[ 5],24
- shrp X[ 5]=X[ 5],X[ 4],24 }
-{ .mii; shrp X[ 4]=X[ 4],X[ 3],24
- shrp X[ 3]=X[ 3],X[ 2],24 }
-{ .mii; shrp X[ 2]=X[ 2],X[ 1],24
- shrp X[ 1]=X[ 1],X[ 0],24 }
-{ .mib; shrp X[ 0]=X[ 0],T1,24
- br.many .L_first16 };;
-.L6byte:
-{ .mmi; $LDW X[ 3]=[input],4*$SZ
- $LDW X[ 2]=[r8],4*$SZ
- shrp X[15]=X[15],X[14],16 }
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[14]=X[14],X[13],16 };;
-{ .mii; $LDW T1=[input]
- shrp X[13]=X[13],X[12],16
- shrp X[12]=X[12],X[11],16 }
-{ .mii; shrp X[11]=X[11],X[10],16
- shrp X[10]=X[10],X[ 9],16 };;
-{ .mii; shrp X[ 9]=X[ 9],X[ 8],16
- shrp X[ 8]=X[ 8],X[ 7],16 }
-{ .mii; shrp X[ 7]=X[ 7],X[ 6],16
- shrp X[ 6]=X[ 6],X[ 5],16 }
-{ .mii; shrp X[ 5]=X[ 5],X[ 4],16
- shrp X[ 4]=X[ 4],X[ 3],16 }
-{ .mii; shrp X[ 3]=X[ 3],X[ 2],16
- shrp X[ 2]=X[ 2],X[ 1],16 }
-{ .mii; shrp X[ 1]=X[ 1],X[ 0],16
- shrp X[ 0]=X[ 0],T1,16 }
-{ .mfb; br.many .L_first16 };;
-.L7byte:
-{ .mmi; $LDW X[ 1]=[r9],4*$SZ
- $LDW X[ 0]=[r10],4*$SZ
- shrp X[15]=X[15],X[14],8 };;
-{ .mii; $LDW T1=[input]
- shrp X[14]=X[14],X[13],8
- shrp X[13]=X[13],X[12],8 }
-{ .mii; shrp X[12]=X[12],X[11],8
- shrp X[11]=X[11],X[10],8 };;
-{ .mii; shrp X[10]=X[10],X[ 9],8
- shrp X[ 9]=X[ 9],X[ 8],8 }
-{ .mii; shrp X[ 8]=X[ 8],X[ 7],8
- shrp X[ 7]=X[ 7],X[ 6],8 }
-{ .mii; shrp X[ 6]=X[ 6],X[ 5],8
- shrp X[ 5]=X[ 5],X[ 4],8 }
-{ .mii; shrp X[ 4]=X[ 4],X[ 3],8
- shrp X[ 3]=X[ 3],X[ 2],8 }
-{ .mii; shrp X[ 2]=X[ 2],X[ 1],8
- shrp X[ 1]=X[ 1],X[ 0],8 }
-{ .mib; shrp X[ 0]=X[ 0],T1,8
- br.many .L_first16 };;
-
-.align 32
-.L_first16:
-{ .mmi; $LDW K=[Ktbl],$SZ
- and T1=F,E
- and T2=A,B }
-{ .mmi; //$LDW X[15]=[input],$SZ // X[i]=*input++
- andcm r8=G,E
- and r9=A,C };;
-{ .mmi; xor T1=T1,r8 //T1=((e & f) ^ (~e & g))
- and r10=B,C
- _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14)
-{ .mmi; xor T2=T2,r9
- mux1 X[15]=X[15],\@rev };; // eliminated in big-endian
-___
-$code.=<<___;
-{ .mib; add T1=T1,H // T1=Ch(e,f,g)+h
- _rotr r8=$t1,$Sigma1[1] } // ROTR(e,18)
-{ .mib; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c))
- mov H=G };;
-{ .mib; xor r11=r8,r11
- _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41)
-{ .mib; mov G=F
- mov F=E };;
-{ .mib; xor r9=r9,r11 // r9=Sigma1(e)
- _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28)
-{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i]
- mov E=D };;
-{ .mib; add T1=T1,r9 // T1+=Sigma1(e)
- _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34)
-{ .mib; mov D=C
- mov C=B };;
-{ .mib; add T1=T1,X[15] // T1+=X[i]
- _rotr r8=$t0,$Sigma0[2] } // ROTR(a,39)
-{ .mib; xor r10=r10,r11
- mux2 X[15]=X[15],0x44 };; // eliminated in 64-bit
-{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
- mov B=A
- add A=T1,T2 };;
-{ .mib; add E=E,T1
- add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
- br.ctop.sptk .L_first16 };;
-.L_first16_end:
-
-{ .mii; mov ar.lc=$rounds-17
- mov ar.ec=1 };;
-
-.align 32
-.L_rest:
-.rotr X[16]
-{ .mib; $LDW K=[Ktbl],$SZ
- _rotr r8=X[15-1],$sigma0[0] } // ROTR(s0,1)
-{ .mib; $ADD X[15]=X[15],X[15-9] // X[i&0xF]+=X[(i+9)&0xF]
- $SHRU s0=X[15-1],sgm0 };; // s0=X[(i+1)&0xF]>>7
-{ .mib; and T1=F,E
- _rotr r9=X[15-1],$sigma0[1] } // ROTR(s0,8)
-{ .mib; andcm r10=G,E
- $SHRU s1=X[15-14],sgm1 };; // s1=X[(i+14)&0xF]>>6
-{ .mmi; xor T1=T1,r10 // T1=((e & f) ^ (~e & g))
- xor r9=r8,r9
- _rotr r10=X[15-14],$sigma1[0] };;// ROTR(s1,19)
-{ .mib; and T2=A,B
- _rotr r11=X[15-14],$sigma1[1] }// ROTR(s1,61)
-{ .mib; and r8=A,C };;
-___
-$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
-// I adhere to mmi; in order to hold Itanium 1 back and avoid 6 cycle
-// pipeline flush in last bundle. Note that even on Itanium2 the
-// latter stalls for one clock cycle...
-{ .mmi; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF])
- dep.z $t1=E,32,32 }
-{ .mmi; xor r10=r11,r10
- zxt4 E=E };;
-{ .mmi; or $t1=$t1,E
- xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF])
- mux2 $t0=A,0x44 };; // copy lower half to upper
-{ .mmi; xor T2=T2,r8
- _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14)
-{ .mmi; and r10=B,C
- add T1=T1,H // T1=Ch(e,f,g)+h
- $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF])
-___
-$t0="A", $t1="E", $code.=<<___ if ($BITS==64);
-{ .mib; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF])
- _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14)
-{ .mib; xor r10=r11,r10
- xor T2=T2,r8 };;
-{ .mib; xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF])
- add T1=T1,H }
-{ .mib; and r10=B,C
- $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF])
-___
-$code.=<<___;
-{ .mmi; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c))
- mov H=G
- _rotr r8=$t1,$Sigma1[1] };; // ROTR(e,18)
-{ .mmi; xor r11=r8,r9
- $ADD X[15]=X[15],s1 // X[i&0xF]+=sigma1(X[(i+14)&0xF])
- _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41)
-{ .mmi; mov G=F
- mov F=E };;
-{ .mib; xor r9=r9,r11 // r9=Sigma1(e)
- _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28)
-{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i]
- mov E=D };;
-{ .mib; add T1=T1,r9 // T1+=Sigma1(e)
- _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34)
-{ .mib; mov D=C
- mov C=B };;
-{ .mmi; add T1=T1,X[15] // T1+=X[i]
- xor r10=r10,r11
- _rotr r8=$t0,$Sigma0[2] };; // ROTR(a,39)
-{ .mmi; xor r10=r8,r10 // r10=Sigma0(a)
- mov B=A
- add A=T1,T2 };;
-{ .mib; add E=E,T1
- add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a)
- br.ctop.sptk .L_rest };;
-.L_rest_end:
-
-{ .mmi; add A_=A_,A
- add B_=B_,B
- add C_=C_,C }
-{ .mmi; add D_=D_,D
- add E_=E_,E
- cmp.ltu p16,p0=1,num };;
-{ .mmi; add F_=F_,F
- add G_=G_,G
- add H_=H_,H }
-{ .mmb; add Ktbl=-$SZ*$rounds,Ktbl
-(p16) add num=-1,num
-(p16) br.dptk.many .L_outer };;
-
-{ .mib; add r8=0*$SZ,ctx
- add r9=1*$SZ,ctx }
-{ .mib; add r10=2*$SZ,ctx
- add r11=3*$SZ,ctx };;
-{ .mmi; $STW [r8]=A_,4*$SZ
- $STW [r9]=B_,4*$SZ
- mov ar.lc=lcsave }
-{ .mmi; $STW [r10]=C_,4*$SZ
- $STW [r11]=D_,4*$SZ
- mov pr=prsave,0x1ffff };;
-{ .mmb; $STW [r8]=E_
- $STW [r9]=F_ }
-{ .mmb; $STW [r10]=G_
- $STW [r11]=H_
- br.ret.sptk.many b0 };;
-.endp $func#
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm;
-if ($BITS==64) {
- $code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm;
- $code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian);
- $code =~ s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm
- if (!$big_endian);
- $code =~ s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm;
-}
-
-print $code;
-
-print<<___ if ($BITS==32);
-.align 64
-.type K256#,\@object
-K256: data4 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- data4 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- data4 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- data4 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- data4 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- data4 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- data4 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- data4 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- data4 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- data4 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- data4 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- data4 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- data4 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- data4 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- data4 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- data4 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.size K256#,$SZ*$rounds
-stringz "SHA256 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
-___
-print<<___ if ($BITS==64);
-.align 64
-.type K512#,\@object
-K512: data8 0x428a2f98d728ae22,0x7137449123ef65cd
- data8 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- data8 0x3956c25bf348b538,0x59f111f1b605d019
- data8 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- data8 0xd807aa98a3030242,0x12835b0145706fbe
- data8 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- data8 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- data8 0x9bdc06a725c71235,0xc19bf174cf692694
- data8 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- data8 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- data8 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- data8 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- data8 0x983e5152ee66dfab,0xa831c66d2db43210
- data8 0xb00327c898fb213f,0xbf597fc7beef0ee4
- data8 0xc6e00bf33da88fc2,0xd5a79147930aa725
- data8 0x06ca6351e003826f,0x142929670a0e6e70
- data8 0x27b70a8546d22ffc,0x2e1b21385c26c926
- data8 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- data8 0x650a73548baf63de,0x766a0abb3c77b2a8
- data8 0x81c2c92e47edaee6,0x92722c851482353b
- data8 0xa2bfe8a14cf10364,0xa81a664bbc423001
- data8 0xc24b8b70d0f89791,0xc76c51a30654be30
- data8 0xd192e819d6ef5218,0xd69906245565a910
- data8 0xf40e35855771202a,0x106aa07032bbd1b8
- data8 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- data8 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- data8 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- data8 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- data8 0x748f82ee5defb2fc,0x78a5636f43172f60
- data8 0x84c87814a1f0ab72,0x8cc702081a6439ec
- data8 0x90befffa23631e28,0xa4506cebde82bde9
- data8 0xbef9a3f7b2c67915,0xc67178f2e372532b
- data8 0xca273eceea26619c,0xd186b8c721c0c207
- data8 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- data8 0x06f067aa72176fba,0x0a637dc5a2c898a6
- data8 0x113f9804bef90dae,0x1b710b35131c471b
- data8 0x28db77f523047d84,0x32caab7b40c72493
- data8 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- data8 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- data8 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.size K512#,$SZ*$rounds
-stringz "SHA512 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
-___
diff --git a/app/openssl/crypto/sha/asm/sha512-mips.pl b/app/openssl/crypto/sha/asm/sha512-mips.pl
deleted file mode 100644
index ffa053bb..00000000
--- a/app/openssl/crypto/sha/asm/sha512-mips.pl
+++ /dev/null
@@ -1,455 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA2 block procedures for MIPS.
-
-# October 2010.
-#
-# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc-
-# generated code in o32 build and ~55% in n32/64 build. SHA512 [which
-# for now can only be compiled for MIPS64 ISA] improvement is modest
-# ~17%, but it comes for free, because it's same instruction sequence.
-# Improvement coefficients are for aligned input.
-
-######################################################################
-# There is a number of MIPS ABI in use, O32 and N32/64 are most
-# widely used. Then there is a new contender: NUBI. It appears that if
-# one picks the latter, it's possible to arrange code in ABI neutral
-# manner. Therefore let's stick to NUBI register layout:
-#
-($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-#
-# The return value is placed in $a0. Following coding rules facilitate
-# interoperability:
-#
-# - never ever touch $tp, "thread pointer", former $gp [o32 can be
-# excluded from the rule, because it's specified volatile];
-# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-# old code];
-# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-#
-# For reference here is register layout for N32/64 MIPS ABIs:
-#
-# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-#
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-
-if ($flavour =~ /64|n32/i) {
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
- $REG_S="sd";
- $REG_L="ld";
- $PTR_SLL="dsll"; # incidentally works even on n32
- $SZREG=8;
-} else {
- $PTR_ADD="add";
- $PTR_SUB="sub";
- $REG_S="sw";
- $REG_L="lw";
- $PTR_SLL="sll";
- $SZREG=4;
-}
-$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
-#
-# <appro@openssl.org>
-#
-######################################################################
-
-$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-
-for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
-open STDOUT,">$output";
-
-if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
-
-if ($output =~ /512/) {
- $label="512";
- $SZ=8;
- $LD="ld"; # load from memory
- $ST="sd"; # store to memory
- $SLL="dsll"; # shift left logical
- $SRL="dsrl"; # shift right logical
- $ADDU="daddu";
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=( 7, 1, 8); # right shift first
- @sigma1=( 6,19,61); # right shift first
- $lastK=0x817;
- $rounds=80;
-} else {
- $label="256";
- $SZ=4;
- $LD="lw"; # load from memory
- $ST="sw"; # store to memory
- $SLL="sll"; # shift left logical
- $SRL="srl"; # shift right logical
- $ADDU="addu";
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 3, 7,18); # right shift first
- @sigma1=(10,17,19); # right shift first
- $lastK=0x8f2;
- $rounds=64;
-}
-
-$MSB = $big_endian ? 0 : ($SZ-1);
-$LSB = ($SZ-1)&~$MSB;
-
-@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31));
-@X=map("\$$_",(8..23));
-
-$ctx=$a0;
-$inp=$a1;
-$len=$a2; $Ktbl=$len;
-
-sub BODY_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
-
-$code.=<<___ if ($i<15);
- ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp)
- ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
-___
-$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
- srl $tmp0,@X[0],24 # byte swap($i)
- srl $tmp1,@X[0],8
- andi $tmp2,@X[0],0xFF00
- sll @X[0],@X[0],24
- andi $tmp1,0xFF00
- sll $tmp2,$tmp2,8
- or @X[0],$tmp0
- or $tmp1,$tmp2
- or @X[0],$tmp1
-___
-$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
- ori $tmp0,$zero,0xFF
- dsll $tmp2,$tmp0,32
- or $tmp0,$tmp2 # 0x000000FF000000FF
- and $tmp1,@X[0],$tmp0 # byte swap($i)
- dsrl $tmp2,@X[0],24
- dsll $tmp1,24
- and $tmp2,$tmp0
- dsll $tmp0,8 # 0x0000FF000000FF00
- or $tmp1,$tmp2
- and $tmp2,@X[0],$tmp0
- dsrl @X[0],8
- dsll $tmp2,8
- and @X[0],$tmp0
- or $tmp1,$tmp2
- or @X[0],$tmp1
- dsrl $tmp1,@X[0],32
- dsll @X[0],32
- or @X[0],$tmp1
-___
-$code.=<<___;
- $ADDU $T1,$X[0],$h # $i
- $SRL $h,$e,@Sigma1[0]
- xor $tmp2,$f,$g
- $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]`
- and $tmp2,$e
- $SRL $tmp0,$e,@Sigma1[1]
- xor $h,$tmp1
- $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]`
- xor $h,$tmp0
- $SRL $tmp0,$e,@Sigma1[2]
- xor $h,$tmp1
- $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]`
- xor $h,$tmp0
- xor $tmp2,$g # Ch(e,f,g)
- xor $tmp0,$tmp1,$h # Sigma1(e)
-
- $SRL $h,$a,@Sigma0[0]
- $ADDU $T1,$tmp2
- $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
- $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]`
- $ADDU $T1,$tmp0
- $SRL $tmp0,$a,@Sigma0[1]
- xor $h,$tmp1
- $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]`
- xor $h,$tmp0
- $SRL $tmp0,$a,@Sigma0[2]
- xor $h,$tmp1
- $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
- xor $h,$tmp0
- $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
- xor $h,$tmp1 # Sigma0(a)
-
- or $tmp0,$a,$b
- and $tmp1,$a,$b
- and $tmp0,$c
- or $tmp1,$tmp0 # Maj(a,b,c)
- $ADDU $T1,$tmp2 # +=K[$i]
- $ADDU $h,$tmp1
-
- $ADDU $d,$T1
- $ADDU $h,$T1
-___
-$code.=<<___ if ($i>=13);
- $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer
-___
-}
-
-sub BODY_16_XX {
-my $i=@_[0];
-my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
-
-$code.=<<___;
- $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
- $ADDU @X[0],@X[9] # +=X[i+9]
- $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
- $SRL $tmp0,@X[1],@sigma0[1]
- xor $tmp2,$tmp1
- $SLL $tmp1,`@sigma0[2]-@sigma0[1]`
- xor $tmp2,$tmp0
- $SRL $tmp0,@X[1],@sigma0[2]
- xor $tmp2,$tmp1
-
- $SRL $tmp3,@X[14],@sigma1[0]
- xor $tmp2,$tmp0 # sigma0(X[i+1])
- $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]`
- $ADDU @X[0],$tmp2
- $SRL $tmp0,@X[14],@sigma1[1]
- xor $tmp3,$tmp1
- $SLL $tmp1,`@sigma1[2]-@sigma1[1]`
- xor $tmp3,$tmp0
- $SRL $tmp0,@X[14],@sigma1[2]
- xor $tmp3,$tmp1
-
- xor $tmp3,$tmp0 # sigma1(X[i+14])
- $ADDU @X[0],$tmp3
-___
- &BODY_00_15(@_);
-}
-
-$FRAMESIZE=16*$SZ+16*$SZREG;
-$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-
-$code.=<<___;
-#ifdef OPENSSL_FIPSCANISTER
-# include <openssl/fipssyms.h>
-#endif
-
-.text
-.set noat
-#if !defined(__vxworks) || defined(__pic__)
-.option pic2
-#endif
-
-.align 5
-.globl sha${label}_block_data_order
-.ent sha${label}_block_data_order
-sha${label}_block_data_order:
- .frame $sp,$FRAMESIZE,$ra
- .mask $SAVED_REGS_MASK,-$SZREG
- .set noreorder
-___
-$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
- .cpload $pf
-___
-$code.=<<___;
- $PTR_SUB $sp,$FRAMESIZE
- $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
- $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
- $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
- $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
- $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
- $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
- $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
- $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
- $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
- $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
-___
-$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
- $REG_S $s3,$FRAMESIZE-11*$SZREG($sp)
- $REG_S $s2,$FRAMESIZE-12*$SZREG($sp)
- $REG_S $s1,$FRAMESIZE-13*$SZREG($sp)
- $REG_S $s0,$FRAMESIZE-14*$SZREG($sp)
- $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
-___
-$code.=<<___;
- $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)`
-___
-$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
- .cplocal $Ktbl
- .cpsetup $pf,$zero,sha${label}_block_data_order
-___
-$code.=<<___;
- .set reorder
- la $Ktbl,K${label} # PIC-ified 'load address'
-
- $LD $A,0*$SZ($ctx) # load context
- $LD $B,1*$SZ($ctx)
- $LD $C,2*$SZ($ctx)
- $LD $D,3*$SZ($ctx)
- $LD $E,4*$SZ($ctx)
- $LD $F,5*$SZ($ctx)
- $LD $G,6*$SZ($ctx)
- $LD $H,7*$SZ($ctx)
-
- $PTR_ADD @X[15],$inp # pointer to the end of input
- $REG_S @X[15],16*$SZ($sp)
- b .Loop
-
-.align 5
-.Loop:
- ${LD}l @X[0],$MSB($inp)
- ${LD}r @X[0],$LSB($inp)
-___
-for ($i=0;$i<16;$i++)
-{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-$code.=<<___;
- b .L16_xx
-.align 4
-.L16_xx:
-___
-for (;$i<32;$i++)
-{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-$code.=<<___;
- and @X[6],0xfff
- li @X[7],$lastK
- .set noreorder
- bne @X[6],@X[7],.L16_xx
- $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16
-
- $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input
- $LD @X[0],0*$SZ($ctx)
- $LD @X[1],1*$SZ($ctx)
- $LD @X[2],2*$SZ($ctx)
- $PTR_ADD $inp,16*$SZ
- $LD @X[3],3*$SZ($ctx)
- $ADDU $A,@X[0]
- $LD @X[4],4*$SZ($ctx)
- $ADDU $B,@X[1]
- $LD @X[5],5*$SZ($ctx)
- $ADDU $C,@X[2]
- $LD @X[6],6*$SZ($ctx)
- $ADDU $D,@X[3]
- $LD @X[7],7*$SZ($ctx)
- $ADDU $E,@X[4]
- $ST $A,0*$SZ($ctx)
- $ADDU $F,@X[5]
- $ST $B,1*$SZ($ctx)
- $ADDU $G,@X[6]
- $ST $C,2*$SZ($ctx)
- $ADDU $H,@X[7]
- $ST $D,3*$SZ($ctx)
- $ST $E,4*$SZ($ctx)
- $ST $F,5*$SZ($ctx)
- $ST $G,6*$SZ($ctx)
- $ST $H,7*$SZ($ctx)
-
- bne $inp,@X[15],.Loop
- $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl
-
- $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
- $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
- $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
- $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
- $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
- $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
- $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
- $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
- $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
- $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
-___
-$code.=<<___ if ($flavour =~ /nubi/i);
- $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
- $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
- $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
- $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
- $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-___
-$code.=<<___;
- jr $ra
- $PTR_ADD $sp,$FRAMESIZE
-.end sha${label}_block_data_order
-
-.rdata
-.align 5
-K${label}:
-___
-if ($SZ==4) {
-$code.=<<___;
- .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
- .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
- .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
- .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
- .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
- .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
- .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
- .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
- .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
- .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
- .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
- .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
- .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
- .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
- .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
- .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-___
-} else {
-$code.=<<___;
- .dword 0x428a2f98d728ae22, 0x7137449123ef65cd
- .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
- .dword 0x3956c25bf348b538, 0x59f111f1b605d019
- .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
- .dword 0xd807aa98a3030242, 0x12835b0145706fbe
- .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
- .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
- .dword 0x9bdc06a725c71235, 0xc19bf174cf692694
- .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
- .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
- .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
- .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
- .dword 0x983e5152ee66dfab, 0xa831c66d2db43210
- .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4
- .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725
- .dword 0x06ca6351e003826f, 0x142929670a0e6e70
- .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926
- .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
- .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8
- .dword 0x81c2c92e47edaee6, 0x92722c851482353b
- .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001
- .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30
- .dword 0xd192e819d6ef5218, 0xd69906245565a910
- .dword 0xf40e35855771202a, 0x106aa07032bbd1b8
- .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
- .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
- .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
- .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
- .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60
- .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec
- .dword 0x90befffa23631e28, 0xa4506cebde82bde9
- .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b
- .dword 0xca273eceea26619c, 0xd186b8c721c0c207
- .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
- .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6
- .dword 0x113f9804bef90dae, 0x1b710b35131c471b
- .dword 0x28db77f523047d84, 0x32caab7b40c72493
- .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
- .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
- .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-___
-}
-$code.=<<___;
-.asciiz "SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-.align 5
-
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-parisc.pl b/app/openssl/crypto/sha/asm/sha512-parisc.pl
deleted file mode 100755
index fc0e15b3..00000000
--- a/app/openssl/crypto/sha/asm/sha512-parisc.pl
+++ /dev/null
@@ -1,793 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA256/512 block procedure for PA-RISC.
-
-# June 2009.
-#
-# SHA256 performance is >75% better than gcc 3.2 generated code on
-# PA-7100LC. Compared to code generated by vendor compiler this
-# implementation is almost 70% faster in 64-bit build, but delivers
-# virtually same performance in 32-bit build on PA-8600.
-#
-# SHA512 performance is >2.9x better than gcc 3.2 generated code on
-# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
-# code is executed on PA-RISC 2.0 processor and switches to 64-bit
-# code path delivering adequate peformance even in "blended" 32-bit
-# build. Though 64-bit code is not any faster than code generated by
-# vendor compiler on PA-8600...
-#
-# Special thanks to polarhome.com for providing HP-UX account.
-
-$flavour = shift;
-$output = shift;
-open STDOUT,">$output";
-
-if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
-} else {
- $LEVEL ="1.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
-}
-
-if ($output =~ /512/) {
- $func="sha512_block_data_order";
- $SZ=8;
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=(1, 8, 7);
- @sigma1=(19,61, 6);
- $rounds=80;
- $LAST10BITS=0x017;
- $LD="ldd";
- $LDM="ldd,ma";
- $ST="std";
-} else {
- $func="sha256_block_data_order";
- $SZ=4;
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 7,18, 3);
- @sigma1=(17,19,10);
- $rounds=64;
- $LAST10BITS=0x0f2;
- $LD="ldw";
- $LDM="ldwm";
- $ST="stw";
-}
-
-$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
- # [+ argument transfer]
-$XOFF=16*$SZ+32; # local variables
-$FRAME+=$XOFF;
-$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
-
-$ctx="%r26"; # zapped by $a0
-$inp="%r25"; # zapped by $a1
-$num="%r24"; # zapped by $t0
-
-$a0 ="%r26";
-$a1 ="%r25";
-$t0 ="%r24";
-$t1 ="%r29";
-$Tbl="%r31";
-
-@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
-
-@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
- "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
-
-sub ROUND_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-$code.=<<___;
- _ror $e,$Sigma1[0],$a0
- and $f,$e,$t0
- _ror $e,$Sigma1[1],$a1
- addl $t1,$h,$h
- andcm $g,$e,$t1
- xor $a1,$a0,$a0
- _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
- or $t0,$t1,$t1 ; Ch(e,f,g)
- addl @X[$i%16],$h,$h
- xor $a0,$a1,$a1 ; Sigma1(e)
- addl $t1,$h,$h
- _ror $a,$Sigma0[0],$a0
- addl $a1,$h,$h
-
- _ror $a,$Sigma0[1],$a1
- and $a,$b,$t0
- and $a,$c,$t1
- xor $a1,$a0,$a0
- _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
- xor $t1,$t0,$t0
- and $b,$c,$t1
- xor $a0,$a1,$a1 ; Sigma0(a)
- addl $h,$d,$d
- xor $t1,$t0,$t0 ; Maj(a,b,c)
- `"$LDM $SZ($Tbl),$t1" if ($i<15)`
- addl $a1,$h,$h
- addl $t0,$h,$h
-
-___
-}
-
-sub ROUND_16_xx {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-$i-=16;
-$code.=<<___;
- _ror @X[($i+1)%16],$sigma0[0],$a0
- _ror @X[($i+1)%16],$sigma0[1],$a1
- addl @X[($i+9)%16],@X[$i],@X[$i]
- _ror @X[($i+14)%16],$sigma1[0],$t0
- _ror @X[($i+14)%16],$sigma1[1],$t1
- xor $a1,$a0,$a0
- _shr @X[($i+1)%16],$sigma0[2],$a1
- xor $t1,$t0,$t0
- _shr @X[($i+14)%16],$sigma1[2],$t1
- xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
- xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
- $LDM $SZ($Tbl),$t1
- addl $a0,@X[$i],@X[$i]
- addl $t0,@X[$i],@X[$i]
-___
-$code.=<<___ if ($i==15);
- extru $t1,31,10,$a1
- comiclr,<> $LAST10BITS,$a1,%r0
- ldo 1($Tbl),$Tbl ; signal end of $Tbl
-___
-&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
-}
-
-$code=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
-
- .ALIGN 64
-L\$table
-___
-$code.=<<___ if ($SZ==8);
- .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
- .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
- .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
- .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
- .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
- .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
- .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
- .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
- .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
- .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
- .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
- .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
- .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
- .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
- .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
- .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
- .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
- .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
- .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
- .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
- .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
- .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
- .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
- .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
- .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
- .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
- .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
- .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
- .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
- .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
- .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
- .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
- .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
- .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
- .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
- .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
- .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
- .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
- .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
- .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
-___
-$code.=<<___ if ($SZ==4);
- .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-___
-$code.=<<___;
-
- .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
- .ALIGN 64
-$func
- .PROC
- .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
- $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
- $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
- $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
- $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
- $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
- $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
- $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
-
- _shl $num,`log(16*$SZ)/log(2)`,$num
- addl $inp,$num,$num ; $num to point at the end of $inp
-
- $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
- $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
- $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
-
- blr %r0,$Tbl
- ldi 3,$t1
-L\$pic
- andcm $Tbl,$t1,$Tbl ; wipe privilege level
- ldo L\$table-L\$pic($Tbl),$Tbl
-___
-$code.=<<___ if ($SZ==8 && $SIZE_T==4);
- ldi 31,$t1
- mtctl $t1,%cr11
- extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
- b L\$parisc1
- nop
-___
-$code.=<<___;
- $LD `0*$SZ`($ctx),$A ; load context
- $LD `1*$SZ`($ctx),$B
- $LD `2*$SZ`($ctx),$C
- $LD `3*$SZ`($ctx),$D
- $LD `4*$SZ`($ctx),$E
- $LD `5*$SZ`($ctx),$F
- $LD `6*$SZ`($ctx),$G
- $LD `7*$SZ`($ctx),$H
-
- extru $inp,31,`log($SZ)/log(2)`,$t0
- sh3addl $t0,%r0,$t0
- subi `8*$SZ`,$t0,$t0
- mtctl $t0,%cr11 ; load %sar with align factor
-
-L\$oop
- ldi `$SZ-1`,$t0
- $LDM $SZ($Tbl),$t1
- andcm $inp,$t0,$t0 ; align $inp
-___
- for ($i=0;$i<15;$i++) { # load input block
- $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
-$code.=<<___;
- cmpb,*= $inp,$t0,L\$aligned
- $LD `$SZ*15`($t0),@X[15]
- $LD `$SZ*16`($t0),@X[16]
-___
- for ($i=0;$i<16;$i++) { # align data
- $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
-$code.=<<___;
-L\$aligned
- nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
-___
-
-for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
-L\$rounds
- nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
-___
-for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
- nop
-
- $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
- $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
- $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
- ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
-
- $LD `0*$SZ`($ctx),@X[0] ; load context
- $LD `1*$SZ`($ctx),@X[1]
- $LD `2*$SZ`($ctx),@X[2]
- $LD `3*$SZ`($ctx),@X[3]
- $LD `4*$SZ`($ctx),@X[4]
- $LD `5*$SZ`($ctx),@X[5]
- addl @X[0],$A,$A
- $LD `6*$SZ`($ctx),@X[6]
- addl @X[1],$B,$B
- $LD `7*$SZ`($ctx),@X[7]
- ldo `16*$SZ`($inp),$inp ; advance $inp
-
- $ST $A,`0*$SZ`($ctx) ; save context
- addl @X[2],$C,$C
- $ST $B,`1*$SZ`($ctx)
- addl @X[3],$D,$D
- $ST $C,`2*$SZ`($ctx)
- addl @X[4],$E,$E
- $ST $D,`3*$SZ`($ctx)
- addl @X[5],$F,$F
- $ST $E,`4*$SZ`($ctx)
- addl @X[6],$G,$G
- $ST $F,`5*$SZ`($ctx)
- addl @X[7],$H,$H
- $ST $G,`6*$SZ`($ctx)
- $ST $H,`7*$SZ`($ctx)
-
- cmpb,*<>,n $inp,$num,L\$oop
- $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
-___
-if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
-{{
-$code.=<<___;
- b L\$done
- nop
-
- .ALIGN 64
-L\$parisc1
-___
-
-@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
- $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
- ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
- "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
-$a0 ="%r17";
-$a1 ="%r18";
-$a2 ="%r19";
-$a3 ="%r20";
-$t0 ="%r21";
-$t1 ="%r22";
-$t2 ="%r28";
-$t3 ="%r29";
-$Tbl="%r31";
-
-@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
-
-sub ROUND_00_15_pa1 {
-my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
- $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
-my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
-
-$code.=<<___ if (!$flag);
- ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
- ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
-___
-$code.=<<___;
- shd $ehi,$elo,$Sigma1[0],$t0
- add $Xlo,$hlo,$hlo
- shd $elo,$ehi,$Sigma1[0],$t1
- addc $Xhi,$hhi,$hhi ; h += X[i]
- shd $ehi,$elo,$Sigma1[1],$t2
- ldwm 8($Tbl),$Xhi
- shd $elo,$ehi,$Sigma1[1],$t3
- ldw -4($Tbl),$Xlo ; load K[i]
- xor $t2,$t0,$t0
- xor $t3,$t1,$t1
- and $flo,$elo,$a0
- and $fhi,$ehi,$a1
- shd $ehi,$elo,$Sigma1[2],$t2
- andcm $glo,$elo,$a2
- shd $elo,$ehi,$Sigma1[2],$t3
- andcm $ghi,$ehi,$a3
- xor $t2,$t0,$t0
- xor $t3,$t1,$t1 ; Sigma1(e)
- add $Xlo,$hlo,$hlo
- xor $a2,$a0,$a0
- addc $Xhi,$hhi,$hhi ; h += K[i]
- xor $a3,$a1,$a1 ; Ch(e,f,g)
-
- add $t0,$hlo,$hlo
- shd $ahi,$alo,$Sigma0[0],$t0
- addc $t1,$hhi,$hhi ; h += Sigma1(e)
- shd $alo,$ahi,$Sigma0[0],$t1
- add $a0,$hlo,$hlo
- shd $ahi,$alo,$Sigma0[1],$t2
- addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
- shd $alo,$ahi,$Sigma0[1],$t3
-
- xor $t2,$t0,$t0
- xor $t3,$t1,$t1
- shd $ahi,$alo,$Sigma0[2],$t2
- and $alo,$blo,$a0
- shd $alo,$ahi,$Sigma0[2],$t3
- and $ahi,$bhi,$a1
- xor $t2,$t0,$t0
- xor $t3,$t1,$t1 ; Sigma0(a)
-
- and $alo,$clo,$a2
- and $ahi,$chi,$a3
- xor $a2,$a0,$a0
- add $hlo,$dlo,$dlo
- xor $a3,$a1,$a1
- addc $hhi,$dhi,$dhi ; d += h
- and $blo,$clo,$a2
- add $t0,$hlo,$hlo
- and $bhi,$chi,$a3
- addc $t1,$hhi,$hhi ; h += Sigma0(a)
- xor $a2,$a0,$a0
- add $a0,$hlo,$hlo
- xor $a3,$a1,$a1 ; Maj(a,b,c)
- addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
-
-___
-$code.=<<___ if ($i==15 && $flag);
- extru $Xlo,31,10,$Xlo
- comiclr,= $LAST10BITS,$Xlo,%r0
- b L\$rounds_pa1
- nop
-___
-push(@X,shift(@X)); push(@X,shift(@X));
-}
-
-sub ROUND_16_xx_pa1 {
-my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
-my ($i)=shift;
-$i-=16;
-$code.=<<___;
- ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
- ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
- ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
- ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
- ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
- ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
- shd $Xnhi,$Xnlo,$sigma0[0],$t0
- shd $Xnlo,$Xnhi,$sigma0[0],$t1
- add $a0,$Xlo,$Xlo
- shd $Xnhi,$Xnlo,$sigma0[1],$t2
- addc $a1,$Xhi,$Xhi
- shd $Xnlo,$Xnhi,$sigma0[1],$t3
- xor $t2,$t0,$t0
- shd $Xnhi,$Xnlo,$sigma0[2],$t2
- xor $t3,$t1,$t1
- extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
- xor $t2,$t0,$t0
- shd $a3,$a2,$sigma1[0],$a0
- xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
- shd $a2,$a3,$sigma1[0],$a1
- add $t0,$Xlo,$Xlo
- shd $a3,$a2,$sigma1[1],$t2
- addc $t1,$Xhi,$Xhi
- shd $a2,$a3,$sigma1[1],$t3
- xor $t2,$a0,$a0
- shd $a3,$a2,$sigma1[2],$t2
- xor $t3,$a1,$a1
- extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
- xor $t2,$a0,$a0
- xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
- add $a0,$Xlo,$Xlo
- addc $a1,$Xhi,$Xhi
-
- stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
- stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
-___
-&ROUND_00_15_pa1($i,@_,1);
-}
-$code.=<<___;
- ldw `0*4`($ctx),$Ahi ; load context
- ldw `1*4`($ctx),$Alo
- ldw `2*4`($ctx),$Bhi
- ldw `3*4`($ctx),$Blo
- ldw `4*4`($ctx),$Chi
- ldw `5*4`($ctx),$Clo
- ldw `6*4`($ctx),$Dhi
- ldw `7*4`($ctx),$Dlo
- ldw `8*4`($ctx),$Ehi
- ldw `9*4`($ctx),$Elo
- ldw `10*4`($ctx),$Fhi
- ldw `11*4`($ctx),$Flo
- ldw `12*4`($ctx),$Ghi
- ldw `13*4`($ctx),$Glo
- ldw `14*4`($ctx),$Hhi
- ldw `15*4`($ctx),$Hlo
-
- extru $inp,31,2,$t0
- sh3addl $t0,%r0,$t0
- subi 32,$t0,$t0
- mtctl $t0,%cr11 ; load %sar with align factor
-
-L\$oop_pa1
- extru $inp,31,2,$a3
- comib,= 0,$a3,L\$aligned_pa1
- sub $inp,$a3,$inp
-
- ldw `0*4`($inp),$X[0]
- ldw `1*4`($inp),$X[1]
- ldw `2*4`($inp),$t2
- ldw `3*4`($inp),$t3
- ldw `4*4`($inp),$a0
- ldw `5*4`($inp),$a1
- ldw `6*4`($inp),$a2
- ldw `7*4`($inp),$a3
- vshd $X[0],$X[1],$X[0]
- vshd $X[1],$t2,$X[1]
- stw $X[0],`-$XOFF+0*4`(%sp)
- ldw `8*4`($inp),$t0
- vshd $t2,$t3,$t2
- stw $X[1],`-$XOFF+1*4`(%sp)
- ldw `9*4`($inp),$t1
- vshd $t3,$a0,$t3
-___
-{
-my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
-for ($i=2;$i<=(128/4-8);$i++) {
-$code.=<<___;
- stw $t[0],`-$XOFF+$i*4`(%sp)
- ldw `(8+$i)*4`($inp),$t[0]
- vshd $t[1],$t[2],$t[1]
-___
-push(@t,shift(@t));
-}
-for (;$i<(128/4-1);$i++) {
-$code.=<<___;
- stw $t[0],`-$XOFF+$i*4`(%sp)
- vshd $t[1],$t[2],$t[1]
-___
-push(@t,shift(@t));
-}
-$code.=<<___;
- b L\$collected_pa1
- stw $t[0],`-$XOFF+$i*4`(%sp)
-
-___
-}
-$code.=<<___;
-L\$aligned_pa1
- ldw `0*4`($inp),$X[0]
- ldw `1*4`($inp),$X[1]
- ldw `2*4`($inp),$t2
- ldw `3*4`($inp),$t3
- ldw `4*4`($inp),$a0
- ldw `5*4`($inp),$a1
- ldw `6*4`($inp),$a2
- ldw `7*4`($inp),$a3
- stw $X[0],`-$XOFF+0*4`(%sp)
- ldw `8*4`($inp),$t0
- stw $X[1],`-$XOFF+1*4`(%sp)
- ldw `9*4`($inp),$t1
-___
-{
-my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
-for ($i=2;$i<(128/4-8);$i++) {
-$code.=<<___;
- stw $t[0],`-$XOFF+$i*4`(%sp)
- ldw `(8+$i)*4`($inp),$t[0]
-___
-push(@t,shift(@t));
-}
-for (;$i<128/4;$i++) {
-$code.=<<___;
- stw $t[0],`-$XOFF+$i*4`(%sp)
-___
-push(@t,shift(@t));
-}
-$code.="L\$collected_pa1\n";
-}
-
-for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
-$code.="L\$rounds_pa1\n";
-for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
-
-$code.=<<___;
- $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
- $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
- $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
- ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
-
- ldw `0*4`($ctx),$t1 ; update context
- ldw `1*4`($ctx),$t0
- ldw `2*4`($ctx),$t3
- ldw `3*4`($ctx),$t2
- ldw `4*4`($ctx),$a1
- ldw `5*4`($ctx),$a0
- ldw `6*4`($ctx),$a3
- add $t0,$Alo,$Alo
- ldw `7*4`($ctx),$a2
- addc $t1,$Ahi,$Ahi
- ldw `8*4`($ctx),$t1
- add $t2,$Blo,$Blo
- ldw `9*4`($ctx),$t0
- addc $t3,$Bhi,$Bhi
- ldw `10*4`($ctx),$t3
- add $a0,$Clo,$Clo
- ldw `11*4`($ctx),$t2
- addc $a1,$Chi,$Chi
- ldw `12*4`($ctx),$a1
- add $a2,$Dlo,$Dlo
- ldw `13*4`($ctx),$a0
- addc $a3,$Dhi,$Dhi
- ldw `14*4`($ctx),$a3
- add $t0,$Elo,$Elo
- ldw `15*4`($ctx),$a2
- addc $t1,$Ehi,$Ehi
- stw $Ahi,`0*4`($ctx)
- add $t2,$Flo,$Flo
- stw $Alo,`1*4`($ctx)
- addc $t3,$Fhi,$Fhi
- stw $Bhi,`2*4`($ctx)
- add $a0,$Glo,$Glo
- stw $Blo,`3*4`($ctx)
- addc $a1,$Ghi,$Ghi
- stw $Chi,`4*4`($ctx)
- add $a2,$Hlo,$Hlo
- stw $Clo,`5*4`($ctx)
- addc $a3,$Hhi,$Hhi
- stw $Dhi,`6*4`($ctx)
- ldo `16*$SZ`($inp),$inp ; advance $inp
- stw $Dlo,`7*4`($ctx)
- stw $Ehi,`8*4`($ctx)
- stw $Elo,`9*4`($ctx)
- stw $Fhi,`10*4`($ctx)
- stw $Flo,`11*4`($ctx)
- stw $Ghi,`12*4`($ctx)
- stw $Glo,`13*4`($ctx)
- stw $Hhi,`14*4`($ctx)
- comb,= $inp,$num,L\$done
- stw $Hlo,`15*4`($ctx)
- b L\$oop_pa1
- $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
-L\$done
-___
-}}
-$code.=<<___;
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
- $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
- $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
- $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
- $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
- $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
- $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
- $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
- .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
-___
-
-# Explicitly encode PA-RISC 2.0 instructions used in this module, so
-# that it can be compiled with .LEVEL 1.0. It should be noted that I
-# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
-# directive...
-
-my $ldd = sub {
- my ($mod,$args) = @_;
- my $orig = "ldd$mod\t$args";
-
- if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
- { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
- $opcode|=(1<<3) if ($mod =~ /^,m/);
- $opcode|=(1<<2) if ($mod =~ /^,mb/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $std = sub {
- my ($mod,$args) = @_;
- my $orig = "std$mod\t$args";
-
- if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
- { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $extrd = sub {
- my ($mod,$args) = @_;
- my $orig = "extrd$mod\t$args";
-
- # I only have ",u" completer, it's implicitly encoded...
- if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
- { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
- my $len=32-$3;
- $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
- $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
- { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
- my $len=32-$2;
- $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
- $opcode |= (1<<13) if ($mod =~ /,\**=/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $shrpd = sub {
- my ($mod,$args) = @_;
- my $orig = "shrpd$mod\t$args";
-
- if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
- { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
- my $cpos=63-$3;
- $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
- { sprintf "\t.WORD\t0x%08x\t; %s",
- (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
- }
- else { "\t".$orig; }
-};
-
-sub assemble {
- my ($mnemonic,$mod,$args)=@_;
- my $opcode = eval("\$$mnemonic");
-
- ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
-}
-
-foreach (split("\n",$code)) {
- s/\`([^\`]*)\`/eval $1/ge;
-
- s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
- $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
- : sprintf("shd\t%$1,%$2,%d",$3)/e or
- # translate made up instructons: _ror, _shr, _align, _shl
- s/_ror(\s+)(%r[0-9]+),/
- ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
-
- s/_shr(\s+%r[0-9]+),([0-9]+),/
- $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
- : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
-
- s/_align(\s+%r[0-9]+,%r[0-9]+),/
- ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
-
- s/_shl(\s+%r[0-9]+),([0-9]+),/
- $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
- : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
-
- s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
-
- s/cmpb,\*/comb,/ if ($SIZE_T==4);
-
- s/\bbv\b/bve/ if ($SIZE_T==8);
-
- print $_,"\n";
-}
-
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-ppc.pl b/app/openssl/crypto/sha/asm/sha512-ppc.pl
deleted file mode 100755
index 6b44a68e..00000000
--- a/app/openssl/crypto/sha/asm/sha512-ppc.pl
+++ /dev/null
@@ -1,460 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# I let hardware handle unaligned input, except on page boundaries
-# (see below for details). Otherwise straightforward implementation
-# with X vector in register bank. The module is big-endian [which is
-# not big deal as there're no little-endian targets left around].
-
-# sha256 | sha512
-# -m64 -m32 | -m64 -m32
-# --------------------------------------+-----------------------
-# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*)
-# Power6,xlc-7 +150% +90% | +100% +430%(*)
-#
-# (*) 64-bit code in 32-bit application context, which actually is
-# on TODO list. It should be noted that for safe deployment in
-# 32-bit *mutli-threaded* context asyncronous signals should be
-# blocked upon entry to SHA512 block routine. This is because
-# 32-bit signaling procedure invalidates upper halves of GPRs.
-# Context switch procedure preserves them, but not signaling:-(
-
-# Second version is true multi-thread safe. Trouble with the original
-# version was that it was using thread local storage pointer register.
-# Well, it scrupulously preserved it, but the problem would arise the
-# moment asynchronous signal was delivered and signal handler would
-# dereference the TLS pointer. While it's never the case in openssl
-# application or test suite, we have to respect this scenario and not
-# use TLS pointer register. Alternative would be to require caller to
-# block signals prior calling this routine. For the record, in 32-bit
-# context R2 serves as TLS pointer, while in 64-bit context - R13.
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T=8;
- $LRSAVE=2*$SIZE_T;
- $STU="stdu";
- $UCMP="cmpld";
- $SHL="sldi";
- $POP="ld";
- $PUSH="std";
-} elsif ($flavour =~ /32/) {
- $SIZE_T=4;
- $LRSAVE=$SIZE_T;
- $STU="stwu";
- $UCMP="cmplw";
- $SHL="slwi";
- $POP="lwz";
- $PUSH="stw";
-} else { die "nonsense $flavour"; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
-
-if ($output =~ /512/) {
- $func="sha512_block_data_order";
- $SZ=8;
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=(1, 8, 7);
- @sigma1=(19,61, 6);
- $rounds=80;
- $LD="ld";
- $ST="std";
- $ROR="rotrdi";
- $SHR="srdi";
-} else {
- $func="sha256_block_data_order";
- $SZ=4;
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 7,18, 3);
- @sigma1=(17,19,10);
- $rounds=64;
- $LD="lwz";
- $ST="stw";
- $ROR="rotrwi";
- $SHR="srwi";
-}
-
-$FRAME=32*$SIZE_T+16*$SZ;
-$LOCALS=6*$SIZE_T;
-
-$sp ="r1";
-$toc="r2";
-$ctx="r3"; # zapped by $a0
-$inp="r4"; # zapped by $a1
-$num="r5"; # zapped by $t0
-
-$T ="r0";
-$a0 ="r3";
-$a1 ="r4";
-$t0 ="r5";
-$t1 ="r6";
-$Tbl="r7";
-
-$A ="r8";
-$B ="r9";
-$C ="r10";
-$D ="r11";
-$E ="r12";
-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
-$G ="r14";
-$H ="r15";
-
-@V=($A,$B,$C,$D,$E,$F,$G,$H);
-@X=("r16","r17","r18","r19","r20","r21","r22","r23",
- "r24","r25","r26","r27","r28","r29","r30","r31");
-
-$inp="r31"; # reassigned $inp! aliases with @X[15]
-
-sub ROUND_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-$code.=<<___;
- $LD $T,`$i*$SZ`($Tbl)
- $ROR $a0,$e,$Sigma1[0]
- $ROR $a1,$e,$Sigma1[1]
- and $t0,$f,$e
- andc $t1,$g,$e
- add $T,$T,$h
- xor $a0,$a0,$a1
- $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
- or $t0,$t0,$t1 ; Ch(e,f,g)
- add $T,$T,@X[$i]
- xor $a0,$a0,$a1 ; Sigma1(e)
- add $T,$T,$t0
- add $T,$T,$a0
-
- $ROR $a0,$a,$Sigma0[0]
- $ROR $a1,$a,$Sigma0[1]
- and $t0,$a,$b
- and $t1,$a,$c
- xor $a0,$a0,$a1
- $ROR $a1,$a1,`$Sigma0[2]-$Sigma0[1]`
- xor $t0,$t0,$t1
- and $t1,$b,$c
- xor $a0,$a0,$a1 ; Sigma0(a)
- add $d,$d,$T
- xor $t0,$t0,$t1 ; Maj(a,b,c)
- add $h,$T,$a0
- add $h,$h,$t0
-
-___
-}
-
-sub ROUND_16_xx {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-$i-=16;
-$code.=<<___;
- $ROR $a0,@X[($i+1)%16],$sigma0[0]
- $ROR $a1,@X[($i+1)%16],$sigma0[1]
- $ROR $t0,@X[($i+14)%16],$sigma1[0]
- $ROR $t1,@X[($i+14)%16],$sigma1[1]
- xor $a0,$a0,$a1
- $SHR $a1,@X[($i+1)%16],$sigma0[2]
- xor $t0,$t0,$t1
- $SHR $t1,@X[($i+14)%16],$sigma1[2]
- add @X[$i],@X[$i],@X[($i+9)%16]
- xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f])
- xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f])
- add @X[$i],@X[$i],$a0
- add @X[$i],@X[$i],$t0
-___
-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
-}
-
-$code=<<___;
-.machine "any"
-.text
-
-.globl $func
-.align 6
-$func:
- $STU $sp,-$FRAME($sp)
- mflr r0
- $SHL $num,$num,`log(16*$SZ)/log(2)`
-
- $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
-
- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
- $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
- $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
- $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
- $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
- $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
- $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
- $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
- $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
- $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
- $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
- $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
- $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
- $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
- $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
- $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
- $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
- $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
- $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
- $PUSH r0,`$FRAME+$LRSAVE`($sp)
-
- $LD $A,`0*$SZ`($ctx)
- mr $inp,r4 ; incarnate $inp
- $LD $B,`1*$SZ`($ctx)
- $LD $C,`2*$SZ`($ctx)
- $LD $D,`3*$SZ`($ctx)
- $LD $E,`4*$SZ`($ctx)
- $LD $F,`5*$SZ`($ctx)
- $LD $G,`6*$SZ`($ctx)
- $LD $H,`7*$SZ`($ctx)
-
- bl LPICmeup
-LPICedup:
- andi. r0,$inp,3
- bne Lunaligned
-Laligned:
- add $num,$inp,$num
- $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
- $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
- bl Lsha2_block_private
- b Ldone
-
-; PowerPC specification allows an implementation to be ill-behaved
-; upon unaligned access which crosses page boundary. "Better safe
-; than sorry" principle makes me treat it specially. But I don't
-; look for particular offending word, but rather for the input
-; block which crosses the boundary. Once found that block is aligned
-; and hashed separately...
-.align 4
-Lunaligned:
- subfic $t1,$inp,4096
- andi. $t1,$t1,`4096-16*$SZ` ; distance to closest page boundary
- beq Lcross_page
- $UCMP $num,$t1
- ble- Laligned ; didn't cross the page boundary
- subfc $num,$t1,$num
- add $t1,$inp,$t1
- $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real remaining num
- $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; intermediate end pointer
- $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
- bl Lsha2_block_private
- ; $inp equals to the intermediate end pointer here
- $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real remaining num
-Lcross_page:
- li $t1,`16*$SZ/4`
- mtctr $t1
- addi r20,$sp,$LOCALS ; aligned spot below the frame
-Lmemcpy:
- lbz r16,0($inp)
- lbz r17,1($inp)
- lbz r18,2($inp)
- lbz r19,3($inp)
- addi $inp,$inp,4
- stb r16,0(r20)
- stb r17,1(r20)
- stb r18,2(r20)
- stb r19,3(r20)
- addi r20,r20,4
- bdnz Lmemcpy
-
- $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
- addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
- addi $inp,$sp,$LOCALS ; fictitious inp pointer
- $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
- $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
- $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
- bl Lsha2_block_private
- $POP $inp,`$FRAME-$SIZE_T*26`($sp) ; restore real inp
- $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
- addic. $num,$num,`-16*$SZ` ; num--
- bne- Lunaligned
-
-Ldone:
- $POP r0,`$FRAME+$LRSAVE`($sp)
- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
- $POP r13,`$FRAME-$SIZE_T*19`($sp)
- $POP r14,`$FRAME-$SIZE_T*18`($sp)
- $POP r15,`$FRAME-$SIZE_T*17`($sp)
- $POP r16,`$FRAME-$SIZE_T*16`($sp)
- $POP r17,`$FRAME-$SIZE_T*15`($sp)
- $POP r18,`$FRAME-$SIZE_T*14`($sp)
- $POP r19,`$FRAME-$SIZE_T*13`($sp)
- $POP r20,`$FRAME-$SIZE_T*12`($sp)
- $POP r21,`$FRAME-$SIZE_T*11`($sp)
- $POP r22,`$FRAME-$SIZE_T*10`($sp)
- $POP r23,`$FRAME-$SIZE_T*9`($sp)
- $POP r24,`$FRAME-$SIZE_T*8`($sp)
- $POP r25,`$FRAME-$SIZE_T*7`($sp)
- $POP r26,`$FRAME-$SIZE_T*6`($sp)
- $POP r27,`$FRAME-$SIZE_T*5`($sp)
- $POP r28,`$FRAME-$SIZE_T*4`($sp)
- $POP r29,`$FRAME-$SIZE_T*3`($sp)
- $POP r30,`$FRAME-$SIZE_T*2`($sp)
- $POP r31,`$FRAME-$SIZE_T*1`($sp)
- mtlr r0
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,4,1,0x80,18,3,0
- .long 0
-
-.align 4
-Lsha2_block_private:
-___
-for($i=0;$i<16;$i++) {
-$code.=<<___ if ($SZ==4);
- lwz @X[$i],`$i*$SZ`($inp)
-___
-# 64-bit loads are split to 2x32-bit ones, as CPU can't handle
-# unaligned 64-bit loads, only 32-bit ones...
-$code.=<<___ if ($SZ==8);
- lwz $t0,`$i*$SZ`($inp)
- lwz @X[$i],`$i*$SZ+4`($inp)
- insrdi @X[$i],$t0,32,0
-___
- &ROUND_00_15($i,@V);
- unshift(@V,pop(@V));
-}
-$code.=<<___;
- li $T,`$rounds/16-1`
- mtctr $T
-.align 4
-Lrounds:
- addi $Tbl,$Tbl,`16*$SZ`
-___
-for(;$i<32;$i++) {
- &ROUND_16_xx($i,@V);
- unshift(@V,pop(@V));
-}
-$code.=<<___;
- bdnz- Lrounds
-
- $POP $ctx,`$FRAME-$SIZE_T*22`($sp)
- $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
- $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
- subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
-
- $LD r16,`0*$SZ`($ctx)
- $LD r17,`1*$SZ`($ctx)
- $LD r18,`2*$SZ`($ctx)
- $LD r19,`3*$SZ`($ctx)
- $LD r20,`4*$SZ`($ctx)
- $LD r21,`5*$SZ`($ctx)
- $LD r22,`6*$SZ`($ctx)
- addi $inp,$inp,`16*$SZ` ; advance inp
- $LD r23,`7*$SZ`($ctx)
- add $A,$A,r16
- add $B,$B,r17
- $PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
- add $C,$C,r18
- $ST $A,`0*$SZ`($ctx)
- add $D,$D,r19
- $ST $B,`1*$SZ`($ctx)
- add $E,$E,r20
- $ST $C,`2*$SZ`($ctx)
- add $F,$F,r21
- $ST $D,`3*$SZ`($ctx)
- add $G,$G,r22
- $ST $E,`4*$SZ`($ctx)
- add $H,$H,r23
- $ST $F,`5*$SZ`($ctx)
- $ST $G,`6*$SZ`($ctx)
- $UCMP $inp,$num
- $ST $H,`7*$SZ`($ctx)
- bne Lsha2_block_private
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-___
-
-# Ugly hack here, because PPC assembler syntax seem to vary too
-# much from platforms to platform...
-$code.=<<___;
-.align 6
-LPICmeup:
- mflr r0
- bcl 20,31,\$+4
- mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
- addi $Tbl,$Tbl,`64-8`
- mtlr r0
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
- .space `64-9*4`
-___
-$code.=<<___ if ($SZ==8);
- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
-___
-$code.=<<___ if ($SZ==4);
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-s390x.pl b/app/openssl/crypto/sha/asm/sha512-s390x.pl
deleted file mode 100644
index 079a3fc7..00000000
--- a/app/openssl/crypto/sha/asm/sha512-s390x.pl
+++ /dev/null
@@ -1,322 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA256/512 block procedures for s390x.
-
-# April 2007.
-#
-# sha256_block_data_order is reportedly >3 times faster than gcc 3.3
-# generated code (must be a bug in compiler, as improvement is
-# "pathologically" high, in particular in comparison to other SHA
-# modules). But the real twist is that it detects if hardware support
-# for SHA256 is available and in such case utilizes it. Then the
-# performance can reach >6.5x of assembler one for larger chunks.
-#
-# sha512_block_data_order is ~70% faster than gcc 3.3 generated code.
-
-# January 2009.
-#
-# Add support for hardware SHA512 and reschedule instructions to
-# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster
-# than software.
-
-# November 2010.
-#
-# Adapt for -m31 build. If kernel supports what's called "highgprs"
-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
-# instructions and achieve "64-bit" performance even in 31-bit legacy
-# application context. The feature is not specific to any particular
-# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific. On z900 SHA256 was measured to
-# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3.
-
-$flavour = shift;
-
-if ($flavour =~ /3[12]/) {
- $SIZE_T=4;
- $g="";
-} else {
- $SIZE_T=8;
- $g="g";
-}
-
-$t0="%r0";
-$t1="%r1";
-$ctx="%r2"; $t2="%r2";
-$inp="%r3";
-$len="%r4"; # used as index in inner loop
-
-$A="%r5";
-$B="%r6";
-$C="%r7";
-$D="%r8";
-$E="%r9";
-$F="%r10";
-$G="%r11";
-$H="%r12"; @V=($A,$B,$C,$D,$E,$F,$G,$H);
-$tbl="%r13";
-$T1="%r14";
-$sp="%r15";
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-if ($output =~ /512/) {
- $label="512";
- $SZ=8;
- $LD="lg"; # load from memory
- $ST="stg"; # store to memory
- $ADD="alg"; # add with memory operand
- $ROT="rllg"; # rotate left
- $SHR="srlg"; # logical right shift [see even at the end]
- @Sigma0=(25,30,36);
- @Sigma1=(23,46,50);
- @sigma0=(56,63, 7);
- @sigma1=( 3,45, 6);
- $rounds=80;
- $kimdfunc=3; # 0 means unknown/unsupported/unimplemented/disabled
-} else {
- $label="256";
- $SZ=4;
- $LD="llgf"; # load from memory
- $ST="st"; # store to memory
- $ADD="al"; # add with memory operand
- $ROT="rll"; # rotate left
- $SHR="srl"; # logical right shift
- @Sigma0=(10,19,30);
- @Sigma1=( 7,21,26);
- @sigma0=(14,25, 3);
- @sigma1=(13,15,10);
- $rounds=64;
- $kimdfunc=2; # magic function code for kimd instruction
-}
-$Func="sha${label}_block_data_order";
-$Table="K${label}";
-$stdframe=16*$SIZE_T+4*8;
-$frame=$stdframe+16*$SZ;
-
-sub BODY_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___ if ($i<16);
- $LD $T1,`$i*$SZ`($inp) ### $i
-___
-$code.=<<___;
- $ROT $t0,$e,$Sigma1[0]
- $ROT $t1,$e,$Sigma1[1]
- lgr $t2,$f
- xgr $t0,$t1
- $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]`
- xgr $t2,$g
- $ST $T1,`$stdframe+$SZ*($i%16)`($sp)
- xgr $t0,$t1 # Sigma1(e)
- algr $T1,$h # T1+=h
- ngr $t2,$e
- lgr $t1,$a
- algr $T1,$t0 # T1+=Sigma1(e)
- $ROT $h,$a,$Sigma0[0]
- xgr $t2,$g # Ch(e,f,g)
- $ADD $T1,`$i*$SZ`($len,$tbl) # T1+=K[i]
- $ROT $t0,$a,$Sigma0[1]
- algr $T1,$t2 # T1+=Ch(e,f,g)
- ogr $t1,$b
- xgr $h,$t0
- lgr $t2,$a
- ngr $t1,$c
- $ROT $t0,$t0,`$Sigma0[2]-$Sigma0[1]`
- xgr $h,$t0 # h=Sigma0(a)
- ngr $t2,$b
- algr $h,$T1 # h+=T1
- ogr $t2,$t1 # Maj(a,b,c)
- algr $d,$T1 # d+=T1
- algr $h,$t2 # h+=Maj(a,b,c)
-___
-}
-
-sub BODY_16_XX {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___;
- $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i
- $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp)
- $ROT $t0,$T1,$sigma0[0]
- $SHR $T1,$sigma0[2]
- $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]`
- xgr $T1,$t0
- $ROT $t0,$t1,$sigma1[0]
- xgr $T1,$t2 # sigma0(X[i+1])
- $SHR $t1,$sigma1[2]
- $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i]
- xgr $t1,$t0
- $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]`
- $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9]
- xgr $t1,$t0 # sigma1(X[i+14])
- algr $T1,$t1 # +=sigma1(X[i+14])
-___
- &BODY_00_15(@_);
-}
-
-$code.=<<___;
-.text
-.align 64
-.type $Table,\@object
-$Table:
-___
-$code.=<<___ if ($SZ==4);
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-___
-$code.=<<___ if ($SZ==8);
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-___
-$code.=<<___;
-.size $Table,.-$Table
-.globl $Func
-.type $Func,\@function
-$Func:
- sllg $len,$len,`log(16*$SZ)/log(2)`
-___
-$code.=<<___ if ($kimdfunc);
- larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lsoftware
- lghi %r0,0
- la %r1,`2*$SIZE_T`($sp)
- .long 0xb93e0002 # kimd %r0,%r2
- lg %r0,`2*$SIZE_T`($sp)
- tmhh %r0,`0x8000>>$kimdfunc`
- jz .Lsoftware
- lghi %r0,$kimdfunc
- lgr %r1,$ctx
- lgr %r2,$inp
- lgr %r3,$len
- .long 0xb93e0002 # kimd %r0,%r2
- brc 1,.-4 # pay attention to "partial completion"
- br %r14
-.align 16
-.Lsoftware:
-___
-$code.=<<___;
- lghi %r1,-$frame
- la $len,0($len,$inp)
- stm${g} $ctx,%r15,`2*$SIZE_T`($sp)
- lgr %r0,$sp
- la $sp,0(%r1,$sp)
- st${g} %r0,0($sp)
-
- larl $tbl,$Table
- $LD $A,`0*$SZ`($ctx)
- $LD $B,`1*$SZ`($ctx)
- $LD $C,`2*$SZ`($ctx)
- $LD $D,`3*$SZ`($ctx)
- $LD $E,`4*$SZ`($ctx)
- $LD $F,`5*$SZ`($ctx)
- $LD $G,`6*$SZ`($ctx)
- $LD $H,`7*$SZ`($ctx)
-
-.Lloop:
- lghi $len,0
-___
-for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
-$code.=".Lrounds_16_xx:\n";
-for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- aghi $len,`16*$SZ`
- lghi $t0,`($rounds-16)*$SZ`
- clgr $len,$t0
- jne .Lrounds_16_xx
-
- l${g} $ctx,`$frame+2*$SIZE_T`($sp)
- la $inp,`16*$SZ`($inp)
- $ADD $A,`0*$SZ`($ctx)
- $ADD $B,`1*$SZ`($ctx)
- $ADD $C,`2*$SZ`($ctx)
- $ADD $D,`3*$SZ`($ctx)
- $ADD $E,`4*$SZ`($ctx)
- $ADD $F,`5*$SZ`($ctx)
- $ADD $G,`6*$SZ`($ctx)
- $ADD $H,`7*$SZ`($ctx)
- $ST $A,`0*$SZ`($ctx)
- $ST $B,`1*$SZ`($ctx)
- $ST $C,`2*$SZ`($ctx)
- $ST $D,`3*$SZ`($ctx)
- $ST $E,`4*$SZ`($ctx)
- $ST $F,`5*$SZ`($ctx)
- $ST $G,`6*$SZ`($ctx)
- $ST $H,`7*$SZ`($ctx)
- cl${g} $inp,`$frame+4*$SIZE_T`($sp)
- jne .Lloop
-
- lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
- br %r14
-.size $Func,.-$Func
-.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,16,8
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-# unlike 32-bit shift 64-bit one takes three arguments
-$code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm;
-
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-sparcv9.pl b/app/openssl/crypto/sha/asm/sha512-sparcv9.pl
deleted file mode 100644
index 58574078..00000000
--- a/app/openssl/crypto/sha/asm/sha512-sparcv9.pl
+++ /dev/null
@@ -1,594 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA256 performance improvement over compiler generated code varies
-# from 40% for Sun C [32-bit build] to 70% for gcc [3.3, 64-bit
-# build]. Just like in SHA1 module I aim to ensure scalability on
-# UltraSPARC T1 by packing X[16] to 8 64-bit registers.
-
-# SHA512 on pre-T1 UltraSPARC.
-#
-# Performance is >75% better than 64-bit code generated by Sun C and
-# over 2x than 32-bit code. X[16] resides on stack, but access to it
-# is scheduled for L2 latency and staged through 32 least significant
-# bits of %l0-%l7. The latter is done to achieve 32-/64-bit ABI
-# duality. Nevetheless it's ~40% faster than SHA256, which is pretty
-# good [optimal coefficient is 50%].
-#
-# SHA512 on UltraSPARC T1.
-#
-# It's not any faster than 64-bit code generated by Sun C 5.8. This is
-# because 64-bit code generator has the advantage of using 64-bit
-# loads(*) to access X[16], which I consciously traded for 32-/64-bit
-# ABI duality [as per above]. But it surpasses 32-bit Sun C generated
-# code by 60%, not to mention that it doesn't suffer from severe decay
-# when running 4 times physical cores threads and that it leaves gcc
-# [3.4] behind by over 4x factor! If compared to SHA256, single thread
-# performance is only 10% better, but overall throughput for maximum
-# amount of threads for given CPU exceeds corresponding one of SHA256
-# by 30% [again, optimal coefficient is 50%].
-#
-# (*) Unlike pre-T1 UltraSPARC loads on T1 are executed strictly
-# in-order, i.e. load instruction has to complete prior next
-# instruction in given thread is executed, even if the latter is
-# not dependent on load result! This means that on T1 two 32-bit
-# loads are always slower than one 64-bit load. Once again this
-# is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
-# 2x32-bit loads can be as fast as 1x64-bit ones.
-
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
-
-$output=shift;
-open STDOUT,">$output";
-
-if ($output =~ /512/) {
- $label="512";
- $SZ=8;
- $LD="ldx"; # load from memory
- $ST="stx"; # store to memory
- $SLL="sllx"; # shift left logical
- $SRL="srlx"; # shift right logical
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=( 7, 1, 8); # right shift first
- @sigma1=( 6,19,61); # right shift first
- $lastK=0x817;
- $rounds=80;
- $align=4;
-
- $locals=16*$SZ; # X[16]
-
- $A="%o0";
- $B="%o1";
- $C="%o2";
- $D="%o3";
- $E="%o4";
- $F="%o5";
- $G="%g1";
- $H="%o7";
- @V=($A,$B,$C,$D,$E,$F,$G,$H);
-} else {
- $label="256";
- $SZ=4;
- $LD="ld"; # load from memory
- $ST="st"; # store to memory
- $SLL="sll"; # shift left logical
- $SRL="srl"; # shift right logical
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 3, 7,18); # right shift first
- @sigma1=(10,17,19); # right shift first
- $lastK=0x8f2;
- $rounds=64;
- $align=8;
-
- $locals=0; # X[16] is register resident
- @X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7");
-
- $A="%l0";
- $B="%l1";
- $C="%l2";
- $D="%l3";
- $E="%l4";
- $F="%l5";
- $G="%l6";
- $H="%l7";
- @V=($A,$B,$C,$D,$E,$F,$G,$H);
-}
-$T1="%g2";
-$tmp0="%g3";
-$tmp1="%g4";
-$tmp2="%g5";
-
-$ctx="%i0";
-$inp="%i1";
-$len="%i2";
-$Ktbl="%i3";
-$tmp31="%i4";
-$tmp32="%i5";
-
-########### SHA256
-$Xload = sub {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-
- if ($i==0) {
-$code.=<<___;
- ldx [$inp+0],@X[0]
- ldx [$inp+16],@X[2]
- ldx [$inp+32],@X[4]
- ldx [$inp+48],@X[6]
- ldx [$inp+8],@X[1]
- ldx [$inp+24],@X[3]
- subcc %g0,$tmp31,$tmp32 ! should be 64-$tmp31, but -$tmp31 works too
- ldx [$inp+40],@X[5]
- bz,pt %icc,.Laligned
- ldx [$inp+56],@X[7]
-
- sllx @X[0],$tmp31,@X[0]
- ldx [$inp+64],$T1
-___
-for($j=0;$j<7;$j++)
-{ $code.=<<___;
- srlx @X[$j+1],$tmp32,$tmp1
- sllx @X[$j+1],$tmp31,@X[$j+1]
- or $tmp1,@X[$j],@X[$j]
-___
-}
-$code.=<<___;
- srlx $T1,$tmp32,$T1
- or $T1,@X[7],@X[7]
-.Laligned:
-___
- }
-
- if ($i&1) {
- $code.="\tadd @X[$i/2],$h,$T1\n";
- } else {
- $code.="\tsrlx @X[$i/2],32,$T1\n\tadd $h,$T1,$T1\n";
- }
-} if ($SZ==4);
-
-########### SHA512
-$Xload = sub {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-my @pair=("%l".eval(($i*2)%8),"%l".eval(($i*2)%8+1),"%l".eval((($i+1)*2)%8));
-
-$code.=<<___ if ($i==0);
- ld [$inp+0],%l0
- ld [$inp+4],%l1
- ld [$inp+8],%l2
- ld [$inp+12],%l3
- ld [$inp+16],%l4
- ld [$inp+20],%l5
- ld [$inp+24],%l6
- ld [$inp+28],%l7
-___
-$code.=<<___ if ($i<15);
- sllx @pair[1],$tmp31,$tmp2 ! Xload($i)
- add $tmp31,32,$tmp0
- sllx @pair[0],$tmp0,$tmp1
- `"ld [$inp+".eval(32+0+$i*8)."],@pair[0]" if ($i<12)`
- srlx @pair[2],$tmp32,@pair[1]
- or $tmp1,$tmp2,$tmp2
- or @pair[1],$tmp2,$tmp2
- `"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)`
- add $h,$tmp2,$T1
- $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
-___
-$code.=<<___ if ($i==12);
- brnz,a $tmp31,.+8
- ld [$inp+128],%l0
-___
-$code.=<<___ if ($i==15);
- ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
- sllx @pair[1],$tmp31,$tmp2 ! Xload($i)
- add $tmp31,32,$tmp0
- ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
- sllx @pair[0],$tmp0,$tmp1
- ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
- srlx @pair[2],$tmp32,@pair[1]
- or $tmp1,$tmp2,$tmp2
- ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
- or @pair[1],$tmp2,$tmp2
- ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
- add $h,$tmp2,$T1
- $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
- ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
- ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
- ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
-___
-} if ($SZ==8);
-
-########### common
-sub BODY_00_15 {
-my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-
- if ($i<16) {
- &$Xload(@_);
- } else {
- $code.="\tadd $h,$T1,$T1\n";
- }
-
-$code.=<<___;
- $SRL $e,@Sigma1[0],$h !! $i
- xor $f,$g,$tmp2
- $SLL $e,`$SZ*8-@Sigma1[2]`,$tmp1
- and $e,$tmp2,$tmp2
- $SRL $e,@Sigma1[1],$tmp0
- xor $tmp1,$h,$h
- $SLL $e,`$SZ*8-@Sigma1[1]`,$tmp1
- xor $tmp0,$h,$h
- $SRL $e,@Sigma1[2],$tmp0
- xor $tmp1,$h,$h
- $SLL $e,`$SZ*8-@Sigma1[0]`,$tmp1
- xor $tmp0,$h,$h
- xor $g,$tmp2,$tmp2 ! Ch(e,f,g)
- xor $tmp1,$h,$tmp0 ! Sigma1(e)
-
- $SRL $a,@Sigma0[0],$h
- add $tmp2,$T1,$T1
- $LD [$Ktbl+`$i*$SZ`],$tmp2 ! K[$i]
- $SLL $a,`$SZ*8-@Sigma0[2]`,$tmp1
- add $tmp0,$T1,$T1
- $SRL $a,@Sigma0[1],$tmp0
- xor $tmp1,$h,$h
- $SLL $a,`$SZ*8-@Sigma0[1]`,$tmp1
- xor $tmp0,$h,$h
- $SRL $a,@Sigma0[2],$tmp0
- xor $tmp1,$h,$h
- $SLL $a,`$SZ*8-@Sigma0[0]`,$tmp1
- xor $tmp0,$h,$h
- xor $tmp1,$h,$h ! Sigma0(a)
-
- or $a,$b,$tmp0
- and $a,$b,$tmp1
- and $c,$tmp0,$tmp0
- or $tmp0,$tmp1,$tmp1 ! Maj(a,b,c)
- add $tmp2,$T1,$T1 ! +=K[$i]
- add $tmp1,$h,$h
-
- add $T1,$d,$d
- add $T1,$h,$h
-___
-}
-
-########### SHA256
-$BODY_16_XX = sub {
-my $i=@_[0];
-my $xi;
-
- if ($i&1) {
- $xi=$tmp32;
- $code.="\tsrlx @X[(($i+1)/2)%8],32,$xi\n";
- } else {
- $xi=@X[(($i+1)/2)%8];
- }
-$code.=<<___;
- srl $xi,@sigma0[0],$T1 !! Xupdate($i)
- sll $xi,`32-@sigma0[2]`,$tmp1
- srl $xi,@sigma0[1],$tmp0
- xor $tmp1,$T1,$T1
- sll $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
- xor $tmp0,$T1,$T1
- srl $xi,@sigma0[2],$tmp0
- xor $tmp1,$T1,$T1
-___
- if ($i&1) {
- $xi=@X[(($i+14)/2)%8];
- } else {
- $xi=$tmp32;
- $code.="\tsrlx @X[(($i+14)/2)%8],32,$xi\n";
- }
-$code.=<<___;
- srl $xi,@sigma1[0],$tmp2
- xor $tmp0,$T1,$T1 ! T1=sigma0(X[i+1])
- sll $xi,`32-@sigma1[2]`,$tmp1
- srl $xi,@sigma1[1],$tmp0
- xor $tmp1,$tmp2,$tmp2
- sll $tmp1,`@sigma1[2]-@sigma1[1]`,$tmp1
- xor $tmp0,$tmp2,$tmp2
- srl $xi,@sigma1[2],$tmp0
- xor $tmp1,$tmp2,$tmp2
-___
- if ($i&1) {
- $xi=@X[($i/2)%8];
-$code.=<<___;
- srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9]
- xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])
- srl @X[($i/2)%8],0,$tmp0
- add $tmp2,$tmp1,$tmp1
- add $xi,$T1,$T1 ! +=X[i]
- xor $tmp0,@X[($i/2)%8],@X[($i/2)%8]
- add $tmp1,$T1,$T1
-
- srl $T1,0,$T1
- or $T1,@X[($i/2)%8],@X[($i/2)%8]
-___
- } else {
- $xi=@X[(($i+9)/2)%8];
-$code.=<<___;
- srlx @X[($i/2)%8],32,$tmp1 ! X[i]
- xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])
- add $xi,$T1,$T1 ! +=X[i+9]
- add $tmp2,$tmp1,$tmp1
- srl @X[($i/2)%8],0,@X[($i/2)%8]
- add $tmp1,$T1,$T1
-
- sllx $T1,32,$tmp0
- or $tmp0,@X[($i/2)%8],@X[($i/2)%8]
-___
- }
- &BODY_00_15(@_);
-} if ($SZ==4);
-
-########### SHA512
-$BODY_16_XX = sub {
-my $i=@_[0];
-my @pair=("%l".eval(($i*2)%8),"%l".eval(($i*2)%8+1));
-
-$code.=<<___;
- sllx %l2,32,$tmp0 !! Xupdate($i)
- or %l3,$tmp0,$tmp0
-
- srlx $tmp0,@sigma0[0],$T1
- ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
- sllx $tmp0,`64-@sigma0[2]`,$tmp1
- ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
- srlx $tmp0,@sigma0[1],$tmp0
- xor $tmp1,$T1,$T1
- sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
- xor $tmp0,$T1,$T1
- srlx $tmp0,`@sigma0[2]-@sigma0[1]`,$tmp0
- xor $tmp1,$T1,$T1
- sllx %l6,32,$tmp2
- xor $tmp0,$T1,$T1 ! sigma0(X[$i+1])
- or %l7,$tmp2,$tmp2
-
- srlx $tmp2,@sigma1[0],$tmp1
- ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
- sllx $tmp2,`64-@sigma1[2]`,$tmp0
- ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
- srlx $tmp2,@sigma1[1],$tmp2
- xor $tmp0,$tmp1,$tmp1
- sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
- xor $tmp2,$tmp1,$tmp1
- srlx $tmp2,`@sigma1[2]-@sigma1[1]`,$tmp2
- xor $tmp0,$tmp1,$tmp1
- sllx %l4,32,$tmp0
- xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14])
- ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
- or %l5,$tmp0,$tmp0
- ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
-
- sllx %l0,32,$tmp2
- add $tmp1,$T1,$T1
- ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
- or %l1,$tmp2,$tmp2
- add $tmp0,$T1,$T1 ! +=X[$i+9]
- ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
- add $tmp2,$T1,$T1 ! +=X[$i]
- $ST $T1,[%sp+`$bias+$frame+($i%16)*$SZ`]
-___
- &BODY_00_15(@_);
-} if ($SZ==8);
-
-$code.=<<___ if ($bits==64);
-.register %g2,#scratch
-.register %g3,#scratch
-___
-$code.=<<___;
-.section ".text",#alloc,#execinstr
-
-.align 64
-K${label}:
-.type K${label},#object
-___
-if ($SZ==4) {
-$code.=<<___;
- .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
- .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
- .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
- .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
- .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
- .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
- .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
- .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
- .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
- .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
- .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
- .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
- .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
- .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
- .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
- .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-___
-} else {
-$code.=<<___;
- .long 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
- .long 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
- .long 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
- .long 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
- .long 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
- .long 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
- .long 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
- .long 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
- .long 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
- .long 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
- .long 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
- .long 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
- .long 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
- .long 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
- .long 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
- .long 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
- .long 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
- .long 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
- .long 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
- .long 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
- .long 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
- .long 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
- .long 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
- .long 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
- .long 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
- .long 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
- .long 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
- .long 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
- .long 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
- .long 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
- .long 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
- .long 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
- .long 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
- .long 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
- .long 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
- .long 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
- .long 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
- .long 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
- .long 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
- .long 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
-___
-}
-$code.=<<___;
-.size K${label},.-K${label}
-.globl sha${label}_block_data_order
-sha${label}_block_data_order:
- save %sp,`-$frame-$locals`,%sp
- and $inp,`$align-1`,$tmp31
- sllx $len,`log(16*$SZ)/log(2)`,$len
- andn $inp,`$align-1`,$inp
- sll $tmp31,3,$tmp31
- add $inp,$len,$len
-___
-$code.=<<___ if ($SZ==8); # SHA512
- mov 32,$tmp32
- sub $tmp32,$tmp31,$tmp32
-___
-$code.=<<___;
-.Lpic: call .+8
- add %o7,K${label}-.Lpic,$Ktbl
-
- $LD [$ctx+`0*$SZ`],$A
- $LD [$ctx+`1*$SZ`],$B
- $LD [$ctx+`2*$SZ`],$C
- $LD [$ctx+`3*$SZ`],$D
- $LD [$ctx+`4*$SZ`],$E
- $LD [$ctx+`5*$SZ`],$F
- $LD [$ctx+`6*$SZ`],$G
- $LD [$ctx+`7*$SZ`],$H
-
-.Lloop:
-___
-for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
-$code.=".L16_xx:\n";
-for (;$i<32;$i++) { &$BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- and $tmp2,0xfff,$tmp2
- cmp $tmp2,$lastK
- bne .L16_xx
- add $Ktbl,`16*$SZ`,$Ktbl ! Ktbl+=16
-
-___
-$code.=<<___ if ($SZ==4); # SHA256
- $LD [$ctx+`0*$SZ`],@X[0]
- $LD [$ctx+`1*$SZ`],@X[1]
- $LD [$ctx+`2*$SZ`],@X[2]
- $LD [$ctx+`3*$SZ`],@X[3]
- $LD [$ctx+`4*$SZ`],@X[4]
- $LD [$ctx+`5*$SZ`],@X[5]
- $LD [$ctx+`6*$SZ`],@X[6]
- $LD [$ctx+`7*$SZ`],@X[7]
-
- add $A,@X[0],$A
- $ST $A,[$ctx+`0*$SZ`]
- add $B,@X[1],$B
- $ST $B,[$ctx+`1*$SZ`]
- add $C,@X[2],$C
- $ST $C,[$ctx+`2*$SZ`]
- add $D,@X[3],$D
- $ST $D,[$ctx+`3*$SZ`]
- add $E,@X[4],$E
- $ST $E,[$ctx+`4*$SZ`]
- add $F,@X[5],$F
- $ST $F,[$ctx+`5*$SZ`]
- add $G,@X[6],$G
- $ST $G,[$ctx+`6*$SZ`]
- add $H,@X[7],$H
- $ST $H,[$ctx+`7*$SZ`]
-___
-$code.=<<___ if ($SZ==8); # SHA512
- ld [$ctx+`0*$SZ+0`],%l0
- ld [$ctx+`0*$SZ+4`],%l1
- ld [$ctx+`1*$SZ+0`],%l2
- ld [$ctx+`1*$SZ+4`],%l3
- ld [$ctx+`2*$SZ+0`],%l4
- ld [$ctx+`2*$SZ+4`],%l5
- ld [$ctx+`3*$SZ+0`],%l6
-
- sllx %l0,32,$tmp0
- ld [$ctx+`3*$SZ+4`],%l7
- sllx %l2,32,$tmp1
- or %l1,$tmp0,$tmp0
- or %l3,$tmp1,$tmp1
- add $tmp0,$A,$A
- add $tmp1,$B,$B
- $ST $A,[$ctx+`0*$SZ`]
- sllx %l4,32,$tmp2
- $ST $B,[$ctx+`1*$SZ`]
- sllx %l6,32,$T1
- or %l5,$tmp2,$tmp2
- or %l7,$T1,$T1
- add $tmp2,$C,$C
- $ST $C,[$ctx+`2*$SZ`]
- add $T1,$D,$D
- $ST $D,[$ctx+`3*$SZ`]
-
- ld [$ctx+`4*$SZ+0`],%l0
- ld [$ctx+`4*$SZ+4`],%l1
- ld [$ctx+`5*$SZ+0`],%l2
- ld [$ctx+`5*$SZ+4`],%l3
- ld [$ctx+`6*$SZ+0`],%l4
- ld [$ctx+`6*$SZ+4`],%l5
- ld [$ctx+`7*$SZ+0`],%l6
-
- sllx %l0,32,$tmp0
- ld [$ctx+`7*$SZ+4`],%l7
- sllx %l2,32,$tmp1
- or %l1,$tmp0,$tmp0
- or %l3,$tmp1,$tmp1
- add $tmp0,$E,$E
- add $tmp1,$F,$F
- $ST $E,[$ctx+`4*$SZ`]
- sllx %l4,32,$tmp2
- $ST $F,[$ctx+`5*$SZ`]
- sllx %l6,32,$T1
- or %l5,$tmp2,$tmp2
- or %l7,$T1,$T1
- add $tmp2,$G,$G
- $ST $G,[$ctx+`6*$SZ`]
- add $T1,$H,$H
- $ST $H,[$ctx+`7*$SZ`]
-___
-$code.=<<___;
- add $inp,`16*$SZ`,$inp ! advance inp
- cmp $inp,$len
- bne `$bits==64?"%xcc":"%icc"`,.Lloop
- sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl
-
- ret
- restore
-.type sha${label}_block_data_order,#function
-.size sha${label}_block_data_order,(.-sha${label}_block_data_order)
-.asciz "SHA${label} block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
-.align 4
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;
diff --git a/app/openssl/crypto/sha/asm/sha512-x86_64.S b/app/openssl/crypto/sha/asm/sha512-x86_64.S
deleted file mode 100644
index 2d3294e0..00000000
--- a/app/openssl/crypto/sha/asm/sha512-x86_64.S
+++ /dev/null
@@ -1,1802 +0,0 @@
-.text
-
-.globl sha512_block_data_order
-.type sha512_block_data_order,@function
-.align 16
-sha512_block_data_order:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- movq %rsp,%r11
- shlq $4,%rdx
- subq $128+32,%rsp
- leaq (%rsi,%rdx,8),%rdx
- andq $-64,%rsp
- movq %rdi,128+0(%rsp)
- movq %rsi,128+8(%rsp)
- movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
-.Lprologue:
-
- leaq K512(%rip),%rbp
-
- movq 0(%rdi),%rax
- movq 8(%rdi),%rbx
- movq 16(%rdi),%rcx
- movq 24(%rdi),%rdx
- movq 32(%rdi),%r8
- movq 40(%rdi),%r9
- movq 48(%rdi),%r10
- movq 56(%rdi),%r11
- jmp .Lloop
-
-.align 16
-.Lloop:
- xorq %rdi,%rdi
- movq 0(%rsi),%r12
- movq %r8,%r13
- movq %rax,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r9,%r15
- movq %r12,0(%rsp)
-
- rorq $5,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- rorq $4,%r13
- addq %r11,%r12
- xorq %rax,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r8,%r15
- movq %rbx,%r11
-
- rorq $6,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- xorq %rcx,%r11
- xorq %rax,%r14
- addq %r15,%r12
- movq %rbx,%r15
-
- rorq $14,%r13
- andq %rax,%r11
- andq %rcx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r11
-
- addq %r12,%rdx
- addq %r12,%r11
- leaq 1(%rdi),%rdi
- addq %r14,%r11
-
- movq 8(%rsi),%r12
- movq %rdx,%r13
- movq %r11,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r8,%r15
- movq %r12,8(%rsp)
-
- rorq $5,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- rorq $4,%r13
- addq %r10,%r12
- xorq %r11,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rdx,%r15
- movq %rax,%r10
-
- rorq $6,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- xorq %rbx,%r10
- xorq %r11,%r14
- addq %r15,%r12
- movq %rax,%r15
-
- rorq $14,%r13
- andq %r11,%r10
- andq %rbx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r10
-
- addq %r12,%rcx
- addq %r12,%r10
- leaq 1(%rdi),%rdi
- addq %r14,%r10
-
- movq 16(%rsi),%r12
- movq %rcx,%r13
- movq %r10,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rdx,%r15
- movq %r12,16(%rsp)
-
- rorq $5,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- rorq $4,%r13
- addq %r9,%r12
- xorq %r10,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rcx,%r15
- movq %r11,%r9
-
- rorq $6,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- xorq %rax,%r9
- xorq %r10,%r14
- addq %r15,%r12
- movq %r11,%r15
-
- rorq $14,%r13
- andq %r10,%r9
- andq %rax,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r9
-
- addq %r12,%rbx
- addq %r12,%r9
- leaq 1(%rdi),%rdi
- addq %r14,%r9
-
- movq 24(%rsi),%r12
- movq %rbx,%r13
- movq %r9,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rcx,%r15
- movq %r12,24(%rsp)
-
- rorq $5,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- rorq $4,%r13
- addq %r8,%r12
- xorq %r9,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rbx,%r15
- movq %r10,%r8
-
- rorq $6,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- xorq %r11,%r8
- xorq %r9,%r14
- addq %r15,%r12
- movq %r10,%r15
-
- rorq $14,%r13
- andq %r9,%r8
- andq %r11,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r8
-
- addq %r12,%rax
- addq %r12,%r8
- leaq 1(%rdi),%rdi
- addq %r14,%r8
-
- movq 32(%rsi),%r12
- movq %rax,%r13
- movq %r8,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rbx,%r15
- movq %r12,32(%rsp)
-
- rorq $5,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- rorq $4,%r13
- addq %rdx,%r12
- xorq %r8,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rax,%r15
- movq %r9,%rdx
-
- rorq $6,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- xorq %r10,%rdx
- xorq %r8,%r14
- addq %r15,%r12
- movq %r9,%r15
-
- rorq $14,%r13
- andq %r8,%rdx
- andq %r10,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rdx
-
- addq %r12,%r11
- addq %r12,%rdx
- leaq 1(%rdi),%rdi
- addq %r14,%rdx
-
- movq 40(%rsi),%r12
- movq %r11,%r13
- movq %rdx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rax,%r15
- movq %r12,40(%rsp)
-
- rorq $5,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- rorq $4,%r13
- addq %rcx,%r12
- xorq %rdx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r11,%r15
- movq %r8,%rcx
-
- rorq $6,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- xorq %r9,%rcx
- xorq %rdx,%r14
- addq %r15,%r12
- movq %r8,%r15
-
- rorq $14,%r13
- andq %rdx,%rcx
- andq %r9,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rcx
-
- addq %r12,%r10
- addq %r12,%rcx
- leaq 1(%rdi),%rdi
- addq %r14,%rcx
-
- movq 48(%rsi),%r12
- movq %r10,%r13
- movq %rcx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r11,%r15
- movq %r12,48(%rsp)
-
- rorq $5,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- rorq $4,%r13
- addq %rbx,%r12
- xorq %rcx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r10,%r15
- movq %rdx,%rbx
-
- rorq $6,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- xorq %r8,%rbx
- xorq %rcx,%r14
- addq %r15,%r12
- movq %rdx,%r15
-
- rorq $14,%r13
- andq %rcx,%rbx
- andq %r8,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rbx
-
- addq %r12,%r9
- addq %r12,%rbx
- leaq 1(%rdi),%rdi
- addq %r14,%rbx
-
- movq 56(%rsi),%r12
- movq %r9,%r13
- movq %rbx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r10,%r15
- movq %r12,56(%rsp)
-
- rorq $5,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- rorq $4,%r13
- addq %rax,%r12
- xorq %rbx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r9,%r15
- movq %rcx,%rax
-
- rorq $6,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- xorq %rdx,%rax
- xorq %rbx,%r14
- addq %r15,%r12
- movq %rcx,%r15
-
- rorq $14,%r13
- andq %rbx,%rax
- andq %rdx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rax
-
- addq %r12,%r8
- addq %r12,%rax
- leaq 1(%rdi),%rdi
- addq %r14,%rax
-
- movq 64(%rsi),%r12
- movq %r8,%r13
- movq %rax,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r9,%r15
- movq %r12,64(%rsp)
-
- rorq $5,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- rorq $4,%r13
- addq %r11,%r12
- xorq %rax,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r8,%r15
- movq %rbx,%r11
-
- rorq $6,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- xorq %rcx,%r11
- xorq %rax,%r14
- addq %r15,%r12
- movq %rbx,%r15
-
- rorq $14,%r13
- andq %rax,%r11
- andq %rcx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r11
-
- addq %r12,%rdx
- addq %r12,%r11
- leaq 1(%rdi),%rdi
- addq %r14,%r11
-
- movq 72(%rsi),%r12
- movq %rdx,%r13
- movq %r11,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r8,%r15
- movq %r12,72(%rsp)
-
- rorq $5,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- rorq $4,%r13
- addq %r10,%r12
- xorq %r11,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rdx,%r15
- movq %rax,%r10
-
- rorq $6,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- xorq %rbx,%r10
- xorq %r11,%r14
- addq %r15,%r12
- movq %rax,%r15
-
- rorq $14,%r13
- andq %r11,%r10
- andq %rbx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r10
-
- addq %r12,%rcx
- addq %r12,%r10
- leaq 1(%rdi),%rdi
- addq %r14,%r10
-
- movq 80(%rsi),%r12
- movq %rcx,%r13
- movq %r10,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rdx,%r15
- movq %r12,80(%rsp)
-
- rorq $5,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- rorq $4,%r13
- addq %r9,%r12
- xorq %r10,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rcx,%r15
- movq %r11,%r9
-
- rorq $6,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- xorq %rax,%r9
- xorq %r10,%r14
- addq %r15,%r12
- movq %r11,%r15
-
- rorq $14,%r13
- andq %r10,%r9
- andq %rax,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r9
-
- addq %r12,%rbx
- addq %r12,%r9
- leaq 1(%rdi),%rdi
- addq %r14,%r9
-
- movq 88(%rsi),%r12
- movq %rbx,%r13
- movq %r9,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rcx,%r15
- movq %r12,88(%rsp)
-
- rorq $5,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- rorq $4,%r13
- addq %r8,%r12
- xorq %r9,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rbx,%r15
- movq %r10,%r8
-
- rorq $6,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- xorq %r11,%r8
- xorq %r9,%r14
- addq %r15,%r12
- movq %r10,%r15
-
- rorq $14,%r13
- andq %r9,%r8
- andq %r11,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r8
-
- addq %r12,%rax
- addq %r12,%r8
- leaq 1(%rdi),%rdi
- addq %r14,%r8
-
- movq 96(%rsi),%r12
- movq %rax,%r13
- movq %r8,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rbx,%r15
- movq %r12,96(%rsp)
-
- rorq $5,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- rorq $4,%r13
- addq %rdx,%r12
- xorq %r8,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rax,%r15
- movq %r9,%rdx
-
- rorq $6,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- xorq %r10,%rdx
- xorq %r8,%r14
- addq %r15,%r12
- movq %r9,%r15
-
- rorq $14,%r13
- andq %r8,%rdx
- andq %r10,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rdx
-
- addq %r12,%r11
- addq %r12,%rdx
- leaq 1(%rdi),%rdi
- addq %r14,%rdx
-
- movq 104(%rsi),%r12
- movq %r11,%r13
- movq %rdx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %rax,%r15
- movq %r12,104(%rsp)
-
- rorq $5,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- rorq $4,%r13
- addq %rcx,%r12
- xorq %rdx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r11,%r15
- movq %r8,%rcx
-
- rorq $6,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- xorq %r9,%rcx
- xorq %rdx,%r14
- addq %r15,%r12
- movq %r8,%r15
-
- rorq $14,%r13
- andq %rdx,%rcx
- andq %r9,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rcx
-
- addq %r12,%r10
- addq %r12,%rcx
- leaq 1(%rdi),%rdi
- addq %r14,%rcx
-
- movq 112(%rsi),%r12
- movq %r10,%r13
- movq %rcx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r11,%r15
- movq %r12,112(%rsp)
-
- rorq $5,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- rorq $4,%r13
- addq %rbx,%r12
- xorq %rcx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r10,%r15
- movq %rdx,%rbx
-
- rorq $6,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- xorq %r8,%rbx
- xorq %rcx,%r14
- addq %r15,%r12
- movq %rdx,%r15
-
- rorq $14,%r13
- andq %rcx,%rbx
- andq %r8,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rbx
-
- addq %r12,%r9
- addq %r12,%rbx
- leaq 1(%rdi),%rdi
- addq %r14,%rbx
-
- movq 120(%rsi),%r12
- movq %r9,%r13
- movq %rbx,%r14
- bswapq %r12
- rorq $23,%r13
- movq %r10,%r15
- movq %r12,120(%rsp)
-
- rorq $5,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- rorq $4,%r13
- addq %rax,%r12
- xorq %rbx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r9,%r15
- movq %rcx,%rax
-
- rorq $6,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- xorq %rdx,%rax
- xorq %rbx,%r14
- addq %r15,%r12
- movq %rcx,%r15
-
- rorq $14,%r13
- andq %rbx,%rax
- andq %rdx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rax
-
- addq %r12,%r8
- addq %r12,%rax
- leaq 1(%rdi),%rdi
- addq %r14,%rax
-
- jmp .Lrounds_16_xx
-.align 16
-.Lrounds_16_xx:
- movq 8(%rsp),%r13
- movq 112(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 72(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 0(%rsp),%r12
- movq %r8,%r13
- addq %r14,%r12
- movq %rax,%r14
- rorq $23,%r13
- movq %r9,%r15
- movq %r12,0(%rsp)
-
- rorq $5,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- rorq $4,%r13
- addq %r11,%r12
- xorq %rax,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r8,%r15
- movq %rbx,%r11
-
- rorq $6,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- xorq %rcx,%r11
- xorq %rax,%r14
- addq %r15,%r12
- movq %rbx,%r15
-
- rorq $14,%r13
- andq %rax,%r11
- andq %rcx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r11
-
- addq %r12,%rdx
- addq %r12,%r11
- leaq 1(%rdi),%rdi
- addq %r14,%r11
-
- movq 16(%rsp),%r13
- movq 120(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 80(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 8(%rsp),%r12
- movq %rdx,%r13
- addq %r14,%r12
- movq %r11,%r14
- rorq $23,%r13
- movq %r8,%r15
- movq %r12,8(%rsp)
-
- rorq $5,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- rorq $4,%r13
- addq %r10,%r12
- xorq %r11,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rdx,%r15
- movq %rax,%r10
-
- rorq $6,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- xorq %rbx,%r10
- xorq %r11,%r14
- addq %r15,%r12
- movq %rax,%r15
-
- rorq $14,%r13
- andq %r11,%r10
- andq %rbx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r10
-
- addq %r12,%rcx
- addq %r12,%r10
- leaq 1(%rdi),%rdi
- addq %r14,%r10
-
- movq 24(%rsp),%r13
- movq 0(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 88(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 16(%rsp),%r12
- movq %rcx,%r13
- addq %r14,%r12
- movq %r10,%r14
- rorq $23,%r13
- movq %rdx,%r15
- movq %r12,16(%rsp)
-
- rorq $5,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- rorq $4,%r13
- addq %r9,%r12
- xorq %r10,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rcx,%r15
- movq %r11,%r9
-
- rorq $6,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- xorq %rax,%r9
- xorq %r10,%r14
- addq %r15,%r12
- movq %r11,%r15
-
- rorq $14,%r13
- andq %r10,%r9
- andq %rax,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r9
-
- addq %r12,%rbx
- addq %r12,%r9
- leaq 1(%rdi),%rdi
- addq %r14,%r9
-
- movq 32(%rsp),%r13
- movq 8(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 96(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 24(%rsp),%r12
- movq %rbx,%r13
- addq %r14,%r12
- movq %r9,%r14
- rorq $23,%r13
- movq %rcx,%r15
- movq %r12,24(%rsp)
-
- rorq $5,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- rorq $4,%r13
- addq %r8,%r12
- xorq %r9,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rbx,%r15
- movq %r10,%r8
-
- rorq $6,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- xorq %r11,%r8
- xorq %r9,%r14
- addq %r15,%r12
- movq %r10,%r15
-
- rorq $14,%r13
- andq %r9,%r8
- andq %r11,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r8
-
- addq %r12,%rax
- addq %r12,%r8
- leaq 1(%rdi),%rdi
- addq %r14,%r8
-
- movq 40(%rsp),%r13
- movq 16(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 104(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 32(%rsp),%r12
- movq %rax,%r13
- addq %r14,%r12
- movq %r8,%r14
- rorq $23,%r13
- movq %rbx,%r15
- movq %r12,32(%rsp)
-
- rorq $5,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- rorq $4,%r13
- addq %rdx,%r12
- xorq %r8,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rax,%r15
- movq %r9,%rdx
-
- rorq $6,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- xorq %r10,%rdx
- xorq %r8,%r14
- addq %r15,%r12
- movq %r9,%r15
-
- rorq $14,%r13
- andq %r8,%rdx
- andq %r10,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rdx
-
- addq %r12,%r11
- addq %r12,%rdx
- leaq 1(%rdi),%rdi
- addq %r14,%rdx
-
- movq 48(%rsp),%r13
- movq 24(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 112(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 40(%rsp),%r12
- movq %r11,%r13
- addq %r14,%r12
- movq %rdx,%r14
- rorq $23,%r13
- movq %rax,%r15
- movq %r12,40(%rsp)
-
- rorq $5,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- rorq $4,%r13
- addq %rcx,%r12
- xorq %rdx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r11,%r15
- movq %r8,%rcx
-
- rorq $6,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- xorq %r9,%rcx
- xorq %rdx,%r14
- addq %r15,%r12
- movq %r8,%r15
-
- rorq $14,%r13
- andq %rdx,%rcx
- andq %r9,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rcx
-
- addq %r12,%r10
- addq %r12,%rcx
- leaq 1(%rdi),%rdi
- addq %r14,%rcx
-
- movq 56(%rsp),%r13
- movq 32(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 120(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 48(%rsp),%r12
- movq %r10,%r13
- addq %r14,%r12
- movq %rcx,%r14
- rorq $23,%r13
- movq %r11,%r15
- movq %r12,48(%rsp)
-
- rorq $5,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- rorq $4,%r13
- addq %rbx,%r12
- xorq %rcx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r10,%r15
- movq %rdx,%rbx
-
- rorq $6,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- xorq %r8,%rbx
- xorq %rcx,%r14
- addq %r15,%r12
- movq %rdx,%r15
-
- rorq $14,%r13
- andq %rcx,%rbx
- andq %r8,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rbx
-
- addq %r12,%r9
- addq %r12,%rbx
- leaq 1(%rdi),%rdi
- addq %r14,%rbx
-
- movq 64(%rsp),%r13
- movq 40(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 0(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 56(%rsp),%r12
- movq %r9,%r13
- addq %r14,%r12
- movq %rbx,%r14
- rorq $23,%r13
- movq %r10,%r15
- movq %r12,56(%rsp)
-
- rorq $5,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- rorq $4,%r13
- addq %rax,%r12
- xorq %rbx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r9,%r15
- movq %rcx,%rax
-
- rorq $6,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- xorq %rdx,%rax
- xorq %rbx,%r14
- addq %r15,%r12
- movq %rcx,%r15
-
- rorq $14,%r13
- andq %rbx,%rax
- andq %rdx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rax
-
- addq %r12,%r8
- addq %r12,%rax
- leaq 1(%rdi),%rdi
- addq %r14,%rax
-
- movq 72(%rsp),%r13
- movq 48(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 8(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 64(%rsp),%r12
- movq %r8,%r13
- addq %r14,%r12
- movq %rax,%r14
- rorq $23,%r13
- movq %r9,%r15
- movq %r12,64(%rsp)
-
- rorq $5,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- rorq $4,%r13
- addq %r11,%r12
- xorq %rax,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r8,%r15
- movq %rbx,%r11
-
- rorq $6,%r14
- xorq %r8,%r13
- xorq %r10,%r15
-
- xorq %rcx,%r11
- xorq %rax,%r14
- addq %r15,%r12
- movq %rbx,%r15
-
- rorq $14,%r13
- andq %rax,%r11
- andq %rcx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r11
-
- addq %r12,%rdx
- addq %r12,%r11
- leaq 1(%rdi),%rdi
- addq %r14,%r11
-
- movq 80(%rsp),%r13
- movq 56(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 16(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 72(%rsp),%r12
- movq %rdx,%r13
- addq %r14,%r12
- movq %r11,%r14
- rorq $23,%r13
- movq %r8,%r15
- movq %r12,72(%rsp)
-
- rorq $5,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- rorq $4,%r13
- addq %r10,%r12
- xorq %r11,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rdx,%r15
- movq %rax,%r10
-
- rorq $6,%r14
- xorq %rdx,%r13
- xorq %r9,%r15
-
- xorq %rbx,%r10
- xorq %r11,%r14
- addq %r15,%r12
- movq %rax,%r15
-
- rorq $14,%r13
- andq %r11,%r10
- andq %rbx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r10
-
- addq %r12,%rcx
- addq %r12,%r10
- leaq 1(%rdi),%rdi
- addq %r14,%r10
-
- movq 88(%rsp),%r13
- movq 64(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 24(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 80(%rsp),%r12
- movq %rcx,%r13
- addq %r14,%r12
- movq %r10,%r14
- rorq $23,%r13
- movq %rdx,%r15
- movq %r12,80(%rsp)
-
- rorq $5,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- rorq $4,%r13
- addq %r9,%r12
- xorq %r10,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rcx,%r15
- movq %r11,%r9
-
- rorq $6,%r14
- xorq %rcx,%r13
- xorq %r8,%r15
-
- xorq %rax,%r9
- xorq %r10,%r14
- addq %r15,%r12
- movq %r11,%r15
-
- rorq $14,%r13
- andq %r10,%r9
- andq %rax,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r9
-
- addq %r12,%rbx
- addq %r12,%r9
- leaq 1(%rdi),%rdi
- addq %r14,%r9
-
- movq 96(%rsp),%r13
- movq 72(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 32(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 88(%rsp),%r12
- movq %rbx,%r13
- addq %r14,%r12
- movq %r9,%r14
- rorq $23,%r13
- movq %rcx,%r15
- movq %r12,88(%rsp)
-
- rorq $5,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- rorq $4,%r13
- addq %r8,%r12
- xorq %r9,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rbx,%r15
- movq %r10,%r8
-
- rorq $6,%r14
- xorq %rbx,%r13
- xorq %rdx,%r15
-
- xorq %r11,%r8
- xorq %r9,%r14
- addq %r15,%r12
- movq %r10,%r15
-
- rorq $14,%r13
- andq %r9,%r8
- andq %r11,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%r8
-
- addq %r12,%rax
- addq %r12,%r8
- leaq 1(%rdi),%rdi
- addq %r14,%r8
-
- movq 104(%rsp),%r13
- movq 80(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 40(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 96(%rsp),%r12
- movq %rax,%r13
- addq %r14,%r12
- movq %r8,%r14
- rorq $23,%r13
- movq %rbx,%r15
- movq %r12,96(%rsp)
-
- rorq $5,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- rorq $4,%r13
- addq %rdx,%r12
- xorq %r8,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %rax,%r15
- movq %r9,%rdx
-
- rorq $6,%r14
- xorq %rax,%r13
- xorq %rcx,%r15
-
- xorq %r10,%rdx
- xorq %r8,%r14
- addq %r15,%r12
- movq %r9,%r15
-
- rorq $14,%r13
- andq %r8,%rdx
- andq %r10,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rdx
-
- addq %r12,%r11
- addq %r12,%rdx
- leaq 1(%rdi),%rdi
- addq %r14,%rdx
-
- movq 112(%rsp),%r13
- movq 88(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 48(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 104(%rsp),%r12
- movq %r11,%r13
- addq %r14,%r12
- movq %rdx,%r14
- rorq $23,%r13
- movq %rax,%r15
- movq %r12,104(%rsp)
-
- rorq $5,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- rorq $4,%r13
- addq %rcx,%r12
- xorq %rdx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r11,%r15
- movq %r8,%rcx
-
- rorq $6,%r14
- xorq %r11,%r13
- xorq %rbx,%r15
-
- xorq %r9,%rcx
- xorq %rdx,%r14
- addq %r15,%r12
- movq %r8,%r15
-
- rorq $14,%r13
- andq %rdx,%rcx
- andq %r9,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rcx
-
- addq %r12,%r10
- addq %r12,%rcx
- leaq 1(%rdi),%rdi
- addq %r14,%rcx
-
- movq 120(%rsp),%r13
- movq 96(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 56(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 112(%rsp),%r12
- movq %r10,%r13
- addq %r14,%r12
- movq %rcx,%r14
- rorq $23,%r13
- movq %r11,%r15
- movq %r12,112(%rsp)
-
- rorq $5,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- rorq $4,%r13
- addq %rbx,%r12
- xorq %rcx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r10,%r15
- movq %rdx,%rbx
-
- rorq $6,%r14
- xorq %r10,%r13
- xorq %rax,%r15
-
- xorq %r8,%rbx
- xorq %rcx,%r14
- addq %r15,%r12
- movq %rdx,%r15
-
- rorq $14,%r13
- andq %rcx,%rbx
- andq %r8,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rbx
-
- addq %r12,%r9
- addq %r12,%rbx
- leaq 1(%rdi),%rdi
- addq %r14,%rbx
-
- movq 0(%rsp),%r13
- movq 104(%rsp),%r14
- movq %r13,%r12
- movq %r14,%r15
-
- rorq $7,%r12
- xorq %r13,%r12
- shrq $7,%r13
-
- rorq $1,%r12
- xorq %r12,%r13
- movq 64(%rsp),%r12
-
- rorq $42,%r15
- xorq %r14,%r15
- shrq $6,%r14
-
- rorq $19,%r15
- addq %r13,%r12
- xorq %r15,%r14
-
- addq 120(%rsp),%r12
- movq %r9,%r13
- addq %r14,%r12
- movq %rbx,%r14
- rorq $23,%r13
- movq %r10,%r15
- movq %r12,120(%rsp)
-
- rorq $5,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- rorq $4,%r13
- addq %rax,%r12
- xorq %rbx,%r14
-
- addq (%rbp,%rdi,8),%r12
- andq %r9,%r15
- movq %rcx,%rax
-
- rorq $6,%r14
- xorq %r9,%r13
- xorq %r11,%r15
-
- xorq %rdx,%rax
- xorq %rbx,%r14
- addq %r15,%r12
- movq %rcx,%r15
-
- rorq $14,%r13
- andq %rbx,%rax
- andq %rdx,%r15
-
- rorq $28,%r14
- addq %r13,%r12
- addq %r15,%rax
-
- addq %r12,%r8
- addq %r12,%rax
- leaq 1(%rdi),%rdi
- addq %r14,%rax
-
- cmpq $80,%rdi
- jb .Lrounds_16_xx
-
- movq 128+0(%rsp),%rdi
- leaq 128(%rsi),%rsi
-
- addq 0(%rdi),%rax
- addq 8(%rdi),%rbx
- addq 16(%rdi),%rcx
- addq 24(%rdi),%rdx
- addq 32(%rdi),%r8
- addq 40(%rdi),%r9
- addq 48(%rdi),%r10
- addq 56(%rdi),%r11
-
- cmpq 128+16(%rsp),%rsi
-
- movq %rax,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rcx,16(%rdi)
- movq %rdx,24(%rdi)
- movq %r8,32(%rdi)
- movq %r9,40(%rdi)
- movq %r10,48(%rdi)
- movq %r11,56(%rdi)
- jb .Lloop
-
- movq 128+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
-.Lepilogue:
- .byte 0xf3,0xc3
-.size sha512_block_data_order,.-sha512_block_data_order
-.align 64
-.type K512,@object
-K512:
-.quad 0x428a2f98d728ae22,0x7137449123ef65cd
-.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-.quad 0x3956c25bf348b538,0x59f111f1b605d019
-.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
-.quad 0xd807aa98a3030242,0x12835b0145706fbe
-.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
-.quad 0x9bdc06a725c71235,0xc19bf174cf692694
-.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
-.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
-.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-.quad 0x983e5152ee66dfab,0xa831c66d2db43210
-.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
-.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
-.quad 0x06ca6351e003826f,0x142929670a0e6e70
-.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
-.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
-.quad 0x81c2c92e47edaee6,0x92722c851482353b
-.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
-.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
-.quad 0xd192e819d6ef5218,0xd69906245565a910
-.quad 0xf40e35855771202a,0x106aa07032bbd1b8
-.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
-.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
-.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
-.quad 0x90befffa23631e28,0xa4506cebde82bde9
-.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
-.quad 0xca273eceea26619c,0xd186b8c721c0c207
-.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
-.quad 0x113f9804bef90dae,0x1b710b35131c471b
-.quad 0x28db77f523047d84,0x32caab7b40c72493
-.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/app/openssl/crypto/sha/asm/sha512-x86_64.pl b/app/openssl/crypto/sha/asm/sha512-x86_64.pl
deleted file mode 100755
index 8d516785..00000000
--- a/app/openssl/crypto/sha/asm/sha512-x86_64.pl
+++ /dev/null
@@ -1,451 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
-# ====================================================================
-#
-# sha256/512_block procedure for x86_64.
-#
-# 40% improvement over compiler-generated code on Opteron. On EM64T
-# sha256 was observed to run >80% faster and sha512 - >40%. No magical
-# tricks, just straight implementation... I really wonder why gcc
-# [being armed with inline assembler] fails to generate as fast code.
-# The only thing which is cool about this module is that it's very
-# same instruction sequence used for both SHA-256 and SHA-512. In
-# former case the instructions operate on 32-bit operands, while in
-# latter - on 64-bit ones. All I had to do is to get one flavor right,
-# the other one passed the test right away:-)
-#
-# sha256_block runs in ~1005 cycles on Opteron, which gives you
-# asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
-# frequency in GHz. sha512_block runs in ~1275 cycles, which results
-# in 128*1000/1275=100MBps per GHz. Is there room for improvement?
-# Well, if you compare it to IA-64 implementation, which maintains
-# X[16] in register bank[!], tends to 4 instructions per CPU clock
-# cycle and runs in 1003 cycles, 1275 is very good result for 3-way
-# issue Opteron pipeline and X[16] maintained in memory. So that *if*
-# there is a way to improve it, *then* the only way would be to try to
-# offload X[16] updates to SSE unit, but that would require "deeper"
-# loop unroll, which in turn would naturally cause size blow-up, not
-# to mention increased complexity! And once again, only *if* it's
-# actually possible to noticeably improve overall ILP, instruction
-# level parallelism, on a given CPU implementation in this case.
-#
-# Special note on Intel EM64T. While Opteron CPU exhibits perfect
-# perfromance ratio of 1.5 between 64- and 32-bit flavors [see above],
-# [currently available] EM64T CPUs apparently are far from it. On the
-# contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
-# sha256_block:-( This is presumably because 64-bit shifts/rotates
-# apparently are not atomic instructions, but implemented in microcode.
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" $xlate $flavour $output";
-*STDOUT=*OUT;
-
-if ($output =~ /512/) {
- $func="sha512_block_data_order";
- $TABLE="K512";
- $SZ=8;
- @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx",
- "%r8", "%r9", "%r10","%r11");
- ($T1,$a0,$a1,$a2)=("%r12","%r13","%r14","%r15");
- @Sigma0=(28,34,39);
- @Sigma1=(14,18,41);
- @sigma0=(1, 8, 7);
- @sigma1=(19,61, 6);
- $rounds=80;
-} else {
- $func="sha256_block_data_order";
- $TABLE="K256";
- $SZ=4;
- @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx",
- "%r8d","%r9d","%r10d","%r11d");
- ($T1,$a0,$a1,$a2)=("%r12d","%r13d","%r14d","%r15d");
- @Sigma0=( 2,13,22);
- @Sigma1=( 6,11,25);
- @sigma0=( 7,18, 3);
- @sigma1=(17,19,10);
- $rounds=64;
-}
-
-$ctx="%rdi"; # 1st arg
-$round="%rdi"; # zaps $ctx
-$inp="%rsi"; # 2nd arg
-$Tbl="%rbp";
-
-$_ctx="16*$SZ+0*8(%rsp)";
-$_inp="16*$SZ+1*8(%rsp)";
-$_end="16*$SZ+2*8(%rsp)";
-$_rsp="16*$SZ+3*8(%rsp)";
-$framesz="16*$SZ+4*8";
-
-
-sub ROUND_00_15()
-{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___;
- ror \$`$Sigma1[2]-$Sigma1[1]`,$a0
- mov $f,$a2
- mov $T1,`$SZ*($i&0xf)`(%rsp)
-
- ror \$`$Sigma0[2]-$Sigma0[1]`,$a1
- xor $e,$a0
- xor $g,$a2 # f^g
-
- ror \$`$Sigma1[1]-$Sigma1[0]`,$a0
- add $h,$T1 # T1+=h
- xor $a,$a1
-
- add ($Tbl,$round,$SZ),$T1 # T1+=K[round]
- and $e,$a2 # (f^g)&e
- mov $b,$h
-
- ror \$`$Sigma0[1]-$Sigma0[0]`,$a1
- xor $e,$a0
- xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g
-
- xor $c,$h # b^c
- xor $a,$a1
- add $a2,$T1 # T1+=Ch(e,f,g)
- mov $b,$a2
-
- ror \$$Sigma1[0],$a0 # Sigma1(e)
- and $a,$h # h=(b^c)&a
- and $c,$a2 # b&c
-
- ror \$$Sigma0[0],$a1 # Sigma0(a)
- add $a0,$T1 # T1+=Sigma1(e)
- add $a2,$h # h+=b&c (completes +=Maj(a,b,c)
-
- add $T1,$d # d+=T1
- add $T1,$h # h+=T1
- lea 1($round),$round # round++
- add $a1,$h # h+=Sigma0(a)
-
-___
-}
-
-sub ROUND_16_XX()
-{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
-
-$code.=<<___;
- mov `$SZ*(($i+1)&0xf)`(%rsp),$a0
- mov `$SZ*(($i+14)&0xf)`(%rsp),$a1
- mov $a0,$T1
- mov $a1,$a2
-
- ror \$`$sigma0[1]-$sigma0[0]`,$T1
- xor $a0,$T1
- shr \$$sigma0[2],$a0
-
- ror \$$sigma0[0],$T1
- xor $T1,$a0 # sigma0(X[(i+1)&0xf])
- mov `$SZ*(($i+9)&0xf)`(%rsp),$T1
-
- ror \$`$sigma1[1]-$sigma1[0]`,$a2
- xor $a1,$a2
- shr \$$sigma1[2],$a1
-
- ror \$$sigma1[0],$a2
- add $a0,$T1
- xor $a2,$a1 # sigma1(X[(i+14)&0xf])
-
- add `$SZ*($i&0xf)`(%rsp),$T1
- mov $e,$a0
- add $a1,$T1
- mov $a,$a1
-___
- &ROUND_00_15(@_);
-}
-
-$code=<<___;
-.text
-
-.globl $func
-.type $func,\@function,4
-.align 16
-$func:
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- mov %rsp,%r11 # copy %rsp
- shl \$4,%rdx # num*16
- sub \$$framesz,%rsp
- lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
- and \$-64,%rsp # align stack frame
- mov $ctx,$_ctx # save ctx, 1st arg
- mov $inp,$_inp # save inp, 2nd arh
- mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
-.Lprologue:
-
- lea $TABLE(%rip),$Tbl
-
- mov $SZ*0($ctx),$A
- mov $SZ*1($ctx),$B
- mov $SZ*2($ctx),$C
- mov $SZ*3($ctx),$D
- mov $SZ*4($ctx),$E
- mov $SZ*5($ctx),$F
- mov $SZ*6($ctx),$G
- mov $SZ*7($ctx),$H
- jmp .Lloop
-
-.align 16
-.Lloop:
- xor $round,$round
-___
- for($i=0;$i<16;$i++) {
- $code.=" mov $SZ*$i($inp),$T1\n";
- $code.=" mov @ROT[4],$a0\n";
- $code.=" mov @ROT[0],$a1\n";
- $code.=" bswap $T1\n";
- &ROUND_00_15($i,@ROT);
- unshift(@ROT,pop(@ROT));
- }
-$code.=<<___;
- jmp .Lrounds_16_xx
-.align 16
-.Lrounds_16_xx:
-___
- for(;$i<32;$i++) {
- &ROUND_16_XX($i,@ROT);
- unshift(@ROT,pop(@ROT));
- }
-
-$code.=<<___;
- cmp \$$rounds,$round
- jb .Lrounds_16_xx
-
- mov $_ctx,$ctx
- lea 16*$SZ($inp),$inp
-
- add $SZ*0($ctx),$A
- add $SZ*1($ctx),$B
- add $SZ*2($ctx),$C
- add $SZ*3($ctx),$D
- add $SZ*4($ctx),$E
- add $SZ*5($ctx),$F
- add $SZ*6($ctx),$G
- add $SZ*7($ctx),$H
-
- cmp $_end,$inp
-
- mov $A,$SZ*0($ctx)
- mov $B,$SZ*1($ctx)
- mov $C,$SZ*2($ctx)
- mov $D,$SZ*3($ctx)
- mov $E,$SZ*4($ctx)
- mov $F,$SZ*5($ctx)
- mov $G,$SZ*6($ctx)
- mov $H,$SZ*7($ctx)
- jb .Lloop
-
- mov $_rsp,%rsi
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
-.Lepilogue:
- ret
-.size $func,.-$func
-___
-
-if ($SZ==4) {
-$code.=<<___;
-.align 64
-.type $TABLE,\@object
-$TABLE:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-___
-} else {
-$code.=<<___;
-.align 64
-.type $TABLE,\@object
-$TABLE:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-___
-}
-
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type se_handler,\@abi-omnipotent
-.align 16
-se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- lea .Lprologue(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lprologue
- jb .Lin_prologue
-
- mov 152($context),%rax # pull context->Rsp
-
- lea .Lepilogue(%rip),%r10
- cmp %r10,%rbx # context->Rip>=.Lepilogue
- jae .Lin_prologue
-
- mov 16*$SZ+3*8(%rax),%rax # pull $_rsp
- lea 48(%rax),%rax
-
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
- mov -24(%rax),%r12
- mov -32(%rax),%r13
- mov -40(%rax),%r14
- mov -48(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_prologue:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$154,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size se_handler,.-se_handler
-
-.section .pdata
-.align 4
- .rva .LSEH_begin_$func
- .rva .LSEH_end_$func
- .rva .LSEH_info_$func
-
-.section .xdata
-.align 8
-.LSEH_info_$func:
- .byte 9,0,0,0
- .rva se_handler
-___
-}
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
-close STDOUT;