summaryrefslogtreecommitdiff
path: root/app/openssl/crypto/aes/asm/aes-armv4.s
diff options
context:
space:
mode:
authorParménides GV <parmegv@sdf.org>2014-09-26 09:46:26 +0200
committerParménides GV <parmegv@sdf.org>2014-09-26 09:46:26 +0200
commit394451dbae3e71282611058e00b5fd16c865f147 (patch)
tree17b71034d9350a2848603f5edf0a8b13025909be /app/openssl/crypto/aes/asm/aes-armv4.s
parent644fd02cf8da95b0b5a99fb9f2142628dd27f7c2 (diff)
Revert "Updated native subprojects from ics-openvpn."
This reverts commit d0e7ba3029b2fd42582413aa95773fe7dbdede90. I'll postpone this work for the next cycle, it's not trivial because it doesn't link properly.
Diffstat (limited to 'app/openssl/crypto/aes/asm/aes-armv4.s')
-rw-r--r--app/openssl/crypto/aes/asm/aes-armv4.s160
1 files changed, 27 insertions, 133 deletions
diff --git a/app/openssl/crypto/aes/asm/aes-armv4.s b/app/openssl/crypto/aes/asm/aes-armv4.s
index 333a5227..2697d4ce 100644
--- a/app/openssl/crypto/aes/asm/aes-armv4.s
+++ b/app/openssl/crypto/aes/asm/aes-armv4.s
@@ -1,53 +1,6 @@
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ AES for ARMv4
-
-@ January 2007.
-@
-@ Code uses single 1K S-box and is >2 times faster than code generated
-@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
-@ allows to merge logical or arithmetic operation with shift or rotate
-@ in one instruction and emit combined result every cycle. The module
-@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
-@ key [on single-issue Xscale PXA250 core].
-
-@ May 2007.
-@
-@ AES_set_[en|de]crypt_key is added.
-
-@ July 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
-@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 16%
-@ improvement on Cortex A8 core and ~21.5 cycles per byte.
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-#endif
-
+#include "arm_arch.h"
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
-.syntax unified
-# ifdef __thumb2__
-.thumb
-# else
.code 32
-# endif
-#endif
.type AES_Te,%object
.align 5
@@ -161,11 +114,7 @@ AES_Te:
.type AES_encrypt,%function
.align 5
AES_encrypt:
-#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_encrypt
-#else
- adr r3,AES_encrypt
-#endif
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
@@ -407,21 +356,11 @@ _armv4_AES_encrypt:
.align 5
private_AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
-#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_set_encrypt_key
-#else
- adr r3,private_AES_set_encrypt_key
-#endif
teq r0,#0
-#if __ARM_ARCH__>=7
- itt eq @ Thumb2 thing, sanity check in ARM
-#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
-#if __ARM_ARCH__>=7
- itt eq @ Thumb2 thing, sanity check in ARM
-#endif
moveq r0,#-1
beq .Labrt
@@ -430,9 +369,6 @@ _armv4_AES_set_encrypt_key:
teq r1,#192
beq .Lok
teq r1,#256
-#if __ARM_ARCH__>=7
- itt ne @ Thumb2 thing, sanity check in ARM
-#endif
movne r0,#-1
bne .Labrt
@@ -587,9 +523,6 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-16]
subs r12,r12,#1
str r3,[r11,#-12]
-#if __ARM_ARCH__>=7
- itt eq @ Thumb2 thing, sanity check in ARM
-#endif
subeq r2,r11,#216
beq .Ldone
@@ -659,9 +592,6 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-24]
subs r12,r12,#1
str r3,[r11,#-20]
-#if __ARM_ARCH__>=7
- itt eq @ Thumb2 thing, sanity check in ARM
-#endif
subeq r2,r11,#256
beq .Ldone
@@ -691,17 +621,11 @@ _armv4_AES_set_encrypt_key:
str r9,[r11,#-4]
b .L256_loop
-.align 2
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt:
-#if __ARM_ARCH__>=5
- bx lr @ .word 0xe12fff1e
-#else
- tst lr,#1
+.Labrt: tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
.global private_AES_set_decrypt_key
@@ -711,57 +635,34 @@ private_AES_set_decrypt_key:
str lr,[sp,#-4]! @ push lr
bl _armv4_AES_set_encrypt_key
teq r0,#0
- ldr lr,[sp],#4 @ pop lr
+ ldrne lr,[sp],#4 @ pop lr
bne .Labrt
- mov r0,r2 @ AES_set_encrypt_key preserves r2,
- mov r1,r2 @ which is AES_KEY *key
- b _armv4_AES_set_enc2dec_key
-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
-
-@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
-.global AES_set_enc2dec_key
-.type AES_set_enc2dec_key,%function
-.align 5
-AES_set_enc2dec_key:
-_armv4_AES_set_enc2dec_key:
- stmdb sp!,{r4-r12,lr}
-
- ldr r12,[r0,#240]
- mov r7,r0 @ input
- add r8,r0,r12,lsl#4
- mov r11,r1 @ ouput
- add r10,r1,r12,lsl#4
- str r12,[r1,#240]
+ stmdb sp!,{r4-r12}
-.Linv: ldr r0,[r7],#16
- ldr r1,[r7,#-12]
- ldr r2,[r7,#-8]
- ldr r3,[r7,#-4]
- ldr r4,[r8],#-16
- ldr r5,[r8,#16+4]
- ldr r6,[r8,#16+8]
- ldr r9,[r8,#16+12]
- str r0,[r10],#-16
- str r1,[r10,#16+4]
- str r2,[r10,#16+8]
- str r3,[r10,#16+12]
- str r4,[r11],#16
- str r5,[r11,#-12]
- str r6,[r11,#-8]
- str r9,[r11,#-4]
- teq r7,r8
- bne .Linv
+ ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
+ mov r11,r2 @ which is AES_KEY *key
+ mov r7,r2
+ add r8,r2,r12,lsl#4
- ldr r0,[r7]
+.Linv: ldr r0,[r7]
ldr r1,[r7,#4]
ldr r2,[r7,#8]
ldr r3,[r7,#12]
- str r0,[r11]
- str r1,[r11,#4]
- str r2,[r11,#8]
- str r3,[r11,#12]
- sub r11,r11,r12,lsl#3
+ ldr r4,[r8]
+ ldr r5,[r8,#4]
+ ldr r6,[r8,#8]
+ ldr r9,[r8,#12]
+ str r0,[r8],#-16
+ str r1,[r8,#16+4]
+ str r2,[r8,#16+8]
+ str r3,[r8,#16+12]
+ str r4,[r7],#16
+ str r5,[r7,#-12]
+ str r6,[r7,#-8]
+ str r9,[r7,#-4]
+ teq r7,r8
+ bne .Linv
ldr r0,[r11,#16]! @ prefetch tp1
mov r7,#0x80
mov r8,#0x1b
@@ -814,7 +715,7 @@ _armv4_AES_set_enc2dec_key:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size AES_set_enc2dec_key,.-AES_set_enc2dec_key
+.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
.type AES_Td,%object
.align 5
@@ -924,11 +825,7 @@ AES_Td:
.type AES_decrypt,%function
.align 5
AES_decrypt:
-#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_decrypt
-#else
- adr r3,AES_decrypt
-#endif
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
@@ -1125,9 +1022,8 @@ _armv4_AES_decrypt:
ldrb r6,[r10,r9] @ Td4[s0>>0]
and r9,lr,r1,lsr#8
- add r1,r10,r1,lsr#24
ldrb r7,[r10,r7] @ Td4[s1>>0]
- ldrb r1,[r1] @ Td4[s1>>24]
+ ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
ldrb r8,[r10,r8] @ Td4[s1>>16]
eor r0,r7,r0,lsl#24
ldrb r9,[r10,r9] @ Td4[s1>>8]
@@ -1140,8 +1036,7 @@ _armv4_AES_decrypt:
ldrb r8,[r10,r8] @ Td4[s2>>0]
and r9,lr,r2,lsr#16
- add r2,r10,r2,lsr#24
- ldrb r2,[r2] @ Td4[s2>>24]
+ ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
eor r0,r0,r7,lsl#8
ldrb r9,[r10,r9] @ Td4[s2>>16]
eor r1,r8,r1,lsl#16
@@ -1153,9 +1048,8 @@ _armv4_AES_decrypt:
ldrb r8,[r10,r8] @ Td4[s3>>8]
and r9,lr,r3 @ i2
- add r3,r10,r3,lsr#24
ldrb r9,[r10,r9] @ Td4[s3>>0]
- ldrb r3,[r3] @ Td4[s3>>24]
+ ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
eor r0,r0,r7,lsl#16
ldr r7,[r11,#0]
eor r1,r1,r8,lsl#8