summaryrefslogtreecommitdiff
path: root/main/openssl/crypto/modes/asm/ghash-x86.S
diff options
context:
space:
mode:
authorArne Schwabe <arne@rfc2549.org>2014-04-23 09:56:37 +0200
committerArne Schwabe <arne@rfc2549.org>2014-04-23 09:56:37 +0200
commite436c963f0976b885a7db04681344779e26dd3b5 (patch)
tree240663106f32e02e1c34080656f4ef21a2e1776e /main/openssl/crypto/modes/asm/ghash-x86.S
parent6a99715a9b072fa249e79c98cd9f03991f0f1219 (diff)
Update OpenSSL to 1.0.1g and statically link OpenVPN with it
Diffstat (limited to 'main/openssl/crypto/modes/asm/ghash-x86.S')
-rw-r--r--main/openssl/crypto/modes/asm/ghash-x86.S728
1 files changed, 728 insertions, 0 deletions
diff --git a/main/openssl/crypto/modes/asm/ghash-x86.S b/main/openssl/crypto/modes/asm/ghash-x86.S
new file mode 100644
index 00000000..cb9ae20d
--- /dev/null
+++ b/main/openssl/crypto/modes/asm/ghash-x86.S
@@ -0,0 +1,728 @@
+.file "ghash-x86.s"
+.text
+.globl gcm_gmult_4bit_x86
+.type gcm_gmult_4bit_x86,@function
+.align 16
+gcm_gmult_4bit_x86:
+.L_gcm_gmult_4bit_x86_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ subl $84,%esp
+ movl 104(%esp),%edi
+ movl 108(%esp),%esi
+ movl (%edi),%ebp
+ movl 4(%edi),%edx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%ebx
+ movl $0,16(%esp)
+ movl $471859200,20(%esp)
+ movl $943718400,24(%esp)
+ movl $610271232,28(%esp)
+ movl $1887436800,32(%esp)
+ movl $1822425088,36(%esp)
+ movl $1220542464,40(%esp)
+ movl $1423966208,44(%esp)
+ movl $3774873600,48(%esp)
+ movl $4246732800,52(%esp)
+ movl $3644850176,56(%esp)
+ movl $3311403008,60(%esp)
+ movl $2441084928,64(%esp)
+ movl $2376073216,68(%esp)
+ movl $2847932416,72(%esp)
+ movl $3051356160,76(%esp)
+ movl %ebp,(%esp)
+ movl %edx,4(%esp)
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ shrl $20,%ebx
+ andl $240,%ebx
+ movl 4(%esi,%ebx,1),%ebp
+ movl (%esi,%ebx,1),%edx
+ movl 12(%esi,%ebx,1),%ecx
+ movl 8(%esi,%ebx,1),%ebx
+ xorl %eax,%eax
+ movl $15,%edi
+ jmp .L000x86_loop
+.align 16
+.L000x86_loop:
+ movb %bl,%al
+ shrdl $4,%ecx,%ebx
+ andb $15,%al
+ shrdl $4,%edx,%ecx
+ shrdl $4,%ebp,%edx
+ shrl $4,%ebp
+ xorl 16(%esp,%eax,4),%ebp
+ movb (%esp,%edi,1),%al
+ andb $240,%al
+ xorl 8(%esi,%eax,1),%ebx
+ xorl 12(%esi,%eax,1),%ecx
+ xorl (%esi,%eax,1),%edx
+ xorl 4(%esi,%eax,1),%ebp
+ decl %edi
+ js .L001x86_break
+ movb %bl,%al
+ shrdl $4,%ecx,%ebx
+ andb $15,%al
+ shrdl $4,%edx,%ecx
+ shrdl $4,%ebp,%edx
+ shrl $4,%ebp
+ xorl 16(%esp,%eax,4),%ebp
+ movb (%esp,%edi,1),%al
+ shlb $4,%al
+ xorl 8(%esi,%eax,1),%ebx
+ xorl 12(%esi,%eax,1),%ecx
+ xorl (%esi,%eax,1),%edx
+ xorl 4(%esi,%eax,1),%ebp
+ jmp .L000x86_loop
+.align 16
+.L001x86_break:
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ bswap %ebp
+ movl 104(%esp),%edi
+ movl %ebx,12(%edi)
+ movl %ecx,8(%edi)
+ movl %edx,4(%edi)
+ movl %ebp,(%edi)
+ addl $84,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
+.globl gcm_ghash_4bit_x86
+.type gcm_ghash_4bit_x86,@function
+.align 16
+gcm_ghash_4bit_x86:
+.L_gcm_ghash_4bit_x86_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ subl $84,%esp
+ movl 104(%esp),%ebx
+ movl 108(%esp),%esi
+ movl 112(%esp),%edi
+ movl 116(%esp),%ecx
+ addl %edi,%ecx
+ movl %ecx,116(%esp)
+ movl (%ebx),%ebp
+ movl 4(%ebx),%edx
+ movl 8(%ebx),%ecx
+ movl 12(%ebx),%ebx
+ movl $0,16(%esp)
+ movl $471859200,20(%esp)
+ movl $943718400,24(%esp)
+ movl $610271232,28(%esp)
+ movl $1887436800,32(%esp)
+ movl $1822425088,36(%esp)
+ movl $1220542464,40(%esp)
+ movl $1423966208,44(%esp)
+ movl $3774873600,48(%esp)
+ movl $4246732800,52(%esp)
+ movl $3644850176,56(%esp)
+ movl $3311403008,60(%esp)
+ movl $2441084928,64(%esp)
+ movl $2376073216,68(%esp)
+ movl $2847932416,72(%esp)
+ movl $3051356160,76(%esp)
+.align 16
+.L002x86_outer_loop:
+ xorl 12(%edi),%ebx
+ xorl 8(%edi),%ecx
+ xorl 4(%edi),%edx
+ xorl (%edi),%ebp
+ movl %ebx,12(%esp)
+ movl %ecx,8(%esp)
+ movl %edx,4(%esp)
+ movl %ebp,(%esp)
+ shrl $20,%ebx
+ andl $240,%ebx
+ movl 4(%esi,%ebx,1),%ebp
+ movl (%esi,%ebx,1),%edx
+ movl 12(%esi,%ebx,1),%ecx
+ movl 8(%esi,%ebx,1),%ebx
+ xorl %eax,%eax
+ movl $15,%edi
+ jmp .L003x86_loop
+.align 16
+.L003x86_loop:
+ movb %bl,%al
+ shrdl $4,%ecx,%ebx
+ andb $15,%al
+ shrdl $4,%edx,%ecx
+ shrdl $4,%ebp,%edx
+ shrl $4,%ebp
+ xorl 16(%esp,%eax,4),%ebp
+ movb (%esp,%edi,1),%al
+ andb $240,%al
+ xorl 8(%esi,%eax,1),%ebx
+ xorl 12(%esi,%eax,1),%ecx
+ xorl (%esi,%eax,1),%edx
+ xorl 4(%esi,%eax,1),%ebp
+ decl %edi
+ js .L004x86_break
+ movb %bl,%al
+ shrdl $4,%ecx,%ebx
+ andb $15,%al
+ shrdl $4,%edx,%ecx
+ shrdl $4,%ebp,%edx
+ shrl $4,%ebp
+ xorl 16(%esp,%eax,4),%ebp
+ movb (%esp,%edi,1),%al
+ shlb $4,%al
+ xorl 8(%esi,%eax,1),%ebx
+ xorl 12(%esi,%eax,1),%ecx
+ xorl (%esi,%eax,1),%edx
+ xorl 4(%esi,%eax,1),%ebp
+ jmp .L003x86_loop
+.align 16
+.L004x86_break:
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ bswap %ebp
+ movl 112(%esp),%edi
+ leal 16(%edi),%edi
+ cmpl 116(%esp),%edi
+ movl %edi,112(%esp)
+ jb .L002x86_outer_loop
+ movl 104(%esp),%edi
+ movl %ebx,12(%edi)
+ movl %ecx,8(%edi)
+ movl %edx,4(%edi)
+ movl %ebp,(%edi)
+ addl $84,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
+.type _mmx_gmult_4bit_inner,@function
+.align 16
+_mmx_gmult_4bit_inner:
+ xorl %ecx,%ecx
+ movl %ebx,%edx
+ movb %dl,%cl
+ shlb $4,%cl
+ andl $240,%edx
+ movq 8(%esi,%ecx,1),%mm0
+ movq (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 14(%edi),%cl
+ psllq $60,%mm2
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 13(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 12(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 11(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 10(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 9(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 8(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 7(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 6(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 5(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 4(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 3(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 2(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb 1(%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb (%edi),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ shlb $4,%cl
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ andl $240,%edx
+ pxor (%eax,%ebp,8),%mm1
+ andl $15,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ movd %mm0,%ebp
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ andl $15,%ebp
+ pxor (%esi,%edx,1),%mm1
+ movd %mm0,%ebx
+ pxor %mm2,%mm0
+ movl 4(%eax,%ebp,8),%edi
+ psrlq $32,%mm0
+ movd %mm1,%edx
+ psrlq $32,%mm1
+ movd %mm0,%ecx
+ movd %mm1,%ebp
+ shll $4,%edi
+ bswap %ebx
+ bswap %edx
+ bswap %ecx
+ xorl %edi,%ebp
+ bswap %ebp
+ ret
+.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
+.globl gcm_gmult_4bit_mmx
+.type gcm_gmult_4bit_mmx,@function
+.align 16
+gcm_gmult_4bit_mmx:
+.L_gcm_gmult_4bit_mmx_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%edi
+ movl 24(%esp),%esi
+ call .L005pic_point
+.L005pic_point:
+ popl %eax
+ leal .Lrem_4bit-.L005pic_point(%eax),%eax
+ movzbl 15(%edi),%ebx
+ call _mmx_gmult_4bit_inner
+ movl 20(%esp),%edi
+ emms
+ movl %ebx,12(%edi)
+ movl %edx,4(%edi)
+ movl %ecx,8(%edi)
+ movl %ebp,(%edi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
+.globl gcm_ghash_4bit_mmx
+.type gcm_ghash_4bit_mmx,@function
+.align 16
+gcm_ghash_4bit_mmx:
+.L_gcm_ghash_4bit_mmx_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%ebp
+ movl 24(%esp),%esi
+ movl 28(%esp),%edi
+ movl 32(%esp),%ecx
+ call .L006pic_point
+.L006pic_point:
+ popl %eax
+ leal .Lrem_4bit-.L006pic_point(%eax),%eax
+ addl %edi,%ecx
+ movl %ecx,32(%esp)
+ subl $20,%esp
+ movl 12(%ebp),%ebx
+ movl 4(%ebp),%edx
+ movl 8(%ebp),%ecx
+ movl (%ebp),%ebp
+ jmp .L007mmx_outer_loop
+.align 16
+.L007mmx_outer_loop:
+ xorl 12(%edi),%ebx
+ xorl 4(%edi),%edx
+ xorl 8(%edi),%ecx
+ xorl (%edi),%ebp
+ movl %edi,48(%esp)
+ movl %ebx,12(%esp)
+ movl %edx,4(%esp)
+ movl %ecx,8(%esp)
+ movl %ebp,(%esp)
+ movl %esp,%edi
+ shrl $24,%ebx
+ call _mmx_gmult_4bit_inner
+ movl 48(%esp),%edi
+ leal 16(%edi),%edi
+ cmpl 52(%esp),%edi
+ jb .L007mmx_outer_loop
+ movl 40(%esp),%edi
+ emms
+ movl %ebx,12(%edi)
+ movl %edx,4(%edi)
+ movl %ecx,8(%edi)
+ movl %ebp,(%edi)
+ addl $20,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.align 64
+.Lrem_4bit:
+.long 0,0,0,29491200,0,58982400,0,38141952
+.long 0,117964800,0,113901568,0,76283904,0,88997888
+.long 0,235929600,0,265420800,0,227803136,0,206962688
+.long 0,152567808,0,148504576,0,177995776,0,190709760
+.align 64
+.L008rem_8bit:
+.value 0,450,900,582,1800,1738,1164,1358
+.value 3600,4050,3476,3158,2328,2266,2716,2910
+.value 7200,7650,8100,7782,6952,6890,6316,6510
+.value 4656,5106,4532,4214,5432,5370,5820,6014
+.value 14400,14722,15300,14854,16200,16010,15564,15630
+.value 13904,14226,13780,13334,12632,12442,13020,13086
+.value 9312,9634,10212,9766,9064,8874,8428,8494
+.value 10864,11186,10740,10294,11640,11450,12028,12094
+.value 28800,28994,29444,29382,30600,30282,29708,30158
+.value 32400,32594,32020,31958,31128,30810,31260,31710
+.value 27808,28002,28452,28390,27560,27242,26668,27118
+.value 25264,25458,24884,24822,26040,25722,26172,26622
+.value 18624,18690,19268,19078,20424,19978,19532,19854
+.value 18128,18194,17748,17558,16856,16410,16988,17310
+.value 21728,21794,22372,22182,21480,21034,20588,20910
+.value 23280,23346,22900,22710,24056,23610,24188,24510
+.value 57600,57538,57988,58182,58888,59338,58764,58446
+.value 61200,61138,60564,60758,59416,59866,60316,59998
+.value 64800,64738,65188,65382,64040,64490,63916,63598
+.value 62256,62194,61620,61814,62520,62970,63420,63102
+.value 55616,55426,56004,56070,56904,57226,56780,56334
+.value 55120,54930,54484,54550,53336,53658,54236,53790
+.value 50528,50338,50916,50982,49768,50090,49644,49198
+.value 52080,51890,51444,51510,52344,52666,53244,52798
+.value 37248,36930,37380,37830,38536,38730,38156,38094
+.value 40848,40530,39956,40406,39064,39258,39708,39646
+.value 36256,35938,36388,36838,35496,35690,35116,35054
+.value 33712,33394,32820,33270,33976,34170,34620,34558
+.value 43456,43010,43588,43910,44744,44810,44364,44174
+.value 42960,42514,42068,42390,41176,41242,41820,41630
+.value 46560,46114,46692,47014,45800,45866,45420,45230
+.value 48112,47666,47220,47542,48376,48442,49020,48830
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+.byte 0