summaryrefslogtreecommitdiff
path: root/app/openssl/crypto/sha/asm/sha1-armv4-large.s
diff options
context:
space:
mode:
Diffstat (limited to 'app/openssl/crypto/sha/asm/sha1-armv4-large.s')
-rw-r--r--app/openssl/crypto/sha/asm/sha1-armv4-large.s209
1 files changed, 137 insertions, 72 deletions
diff --git a/app/openssl/crypto/sha/asm/sha1-armv4-large.s b/app/openssl/crypto/sha/asm/sha1-armv4-large.s
index 7f687d9f..639ae78a 100644
--- a/app/openssl/crypto/sha/asm/sha1-armv4-large.s
+++ b/app/openssl/crypto/sha/asm/sha1-armv4-large.s
@@ -1,3 +1,5 @@
+#include "arm_arch.h"
+
.text
.global sha1_block_data_order
@@ -16,76 +18,126 @@ sha1_block_data_order:
mov r6,r6,ror#30
mov r7,r7,ror#30 @ [6]
.L_00_15:
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r5,r6 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r7,r8,r7,ror#2 @ E+=K_00_19
eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r4,r10,ror#2
add r7,r7,r9 @ E+=X[i]
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r7,r7,r10 @ E+=F_00_19(B,C,D)
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r6,r8,r6,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r4,r5 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r6,r8,r6,ror#2 @ E+=K_00_19
eor r10,r4,r5 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r3,r10,ror#2
add r6,r6,r9 @ E+=X[i]
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r6,r6,r10 @ E+=F_00_19(B,C,D)
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r5,r8,r5,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r3,r4 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r5,r8,r5,ror#2 @ E+=K_00_19
eor r10,r3,r4 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r7,r10,ror#2
add r5,r5,r9 @ E+=X[i]
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r5,r5,r10 @ E+=F_00_19(B,C,D)
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r4,r8,r4,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r7,r3 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r4,r8,r4,ror#2 @ E+=K_00_19
eor r10,r7,r3 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r6,r10,ror#2
add r4,r4,r9 @ E+=X[i]
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r4,r4,r10 @ E+=F_00_19(B,C,D)
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r3,r8,r3,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r6,r7 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r3,r8,r3,ror#2 @ E+=K_00_19
eor r10,r6,r7 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r5,r10,ror#2
add r3,r3,r9 @ E+=X[i]
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
@@ -93,17 +145,27 @@ sha1_block_data_order:
add r3,r3,r10 @ E+=F_00_19(B,C,D)
teq r14,sp
bne .L_00_15 @ [((11+4)*5+2)*3]
- sub sp,sp,#5*4
- ldrb r9,[r1],#4
- ldrb r10,[r1,#-1]
- ldrb r11,[r1,#-2]
+ sub sp,sp,#25*4
+#if __ARM_ARCH__<7
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1,#-3]
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r5,r6 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r10,r9,lsl#24
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r7,r8,r7,ror#2 @ E+=K_00_19
eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#8
- orr r9,r9,r12,lsl#16
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
and r10,r4,r10,ror#2
add r7,r7,r9 @ E+=X[i]
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
@@ -115,15 +177,15 @@ sha1_block_data_order:
add r6,r8,r6,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r4,r5 @ F_xx_xx
mov r9,r9,ror#31
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r3,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r6,r6,r9 @ E+=X[i]
- str r9,[r14,#-4]!
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
add r6,r6,r10 @ E+=F_00_19(B,C,D)
ldr r9,[r14,#15*4]
@@ -132,15 +194,15 @@ sha1_block_data_order:
add r5,r8,r5,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r3,r4 @ F_xx_xx
mov r9,r9,ror#31
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r7,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r5,r5,r9 @ E+=X[i]
- str r9,[r14,#-4]!
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
add r5,r5,r10 @ E+=F_00_19(B,C,D)
ldr r9,[r14,#15*4]
@@ -149,15 +211,15 @@ sha1_block_data_order:
add r4,r8,r4,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r7,r3 @ F_xx_xx
mov r9,r9,ror#31
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r6,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r4,r4,r9 @ E+=X[i]
- str r9,[r14,#-4]!
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
add r4,r4,r10 @ E+=F_00_19(B,C,D)
ldr r9,[r14,#15*4]
@@ -166,20 +228,19 @@ sha1_block_data_order:
add r3,r8,r3,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r6,r7 @ F_xx_xx
mov r9,r9,ror#31
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r5,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r3,r3,r9 @ E+=X[i]
- str r9,[r14,#-4]!
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
add r3,r3,r10 @ E+=F_00_19(B,C,D)
ldr r8,.LK_20_39 @ [+15+16*4]
- sub sp,sp,#20*4
cmn sp,#0 @ [+3], clear carry to denote 20_39
.L_20_39_or_60_79:
ldr r9,[r14,#15*4]
@@ -188,15 +249,15 @@ sha1_block_data_order:
add r7,r8,r7,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r5,r6 @ F_xx_xx
mov r9,r9,ror#31
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
eor r10,r4,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r7,r7,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r7,r7,r10 @ E+=F_20_39(B,C,D)
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
@@ -204,15 +265,15 @@ sha1_block_data_order:
add r6,r8,r6,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r4,r5 @ F_xx_xx
mov r9,r9,ror#31
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
eor r10,r3,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r6,r6,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r6,r6,r10 @ E+=F_20_39(B,C,D)
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
@@ -220,15 +281,15 @@ sha1_block_data_order:
add r5,r8,r5,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r3,r4 @ F_xx_xx
mov r9,r9,ror#31
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
eor r10,r7,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r5,r5,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r5,r5,r10 @ E+=F_20_39(B,C,D)
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
@@ -236,15 +297,15 @@ sha1_block_data_order:
add r4,r8,r4,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r7,r3 @ F_xx_xx
mov r9,r9,ror#31
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
eor r10,r6,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r4,r4,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r4,r4,r10 @ E+=F_20_39(B,C,D)
ldr r9,[r14,#15*4]
ldr r10,[r14,#13*4]
@@ -252,15 +313,15 @@ sha1_block_data_order:
add r3,r8,r3,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r6,r7 @ F_xx_xx
mov r9,r9,ror#31
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
eor r10,r5,r10,ror#2 @ F_xx_xx
@ F_xx_xx
add r3,r3,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r3,r3,r10 @ E+=F_20_39(B,C,D)
teq r14,sp @ preserve carry
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
@@ -275,15 +336,15 @@ sha1_block_data_order:
add r7,r8,r7,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r5,r6 @ F_xx_xx
mov r9,r9,ror#31
add r7,r7,r3,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r4,r10,ror#2 @ F_xx_xx
and r11,r5,r6 @ F_xx_xx
add r7,r7,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r7,r7,r10 @ E+=F_40_59(B,C,D)
add r7,r7,r11,ror#2
ldr r9,[r14,#15*4]
@@ -292,15 +353,15 @@ sha1_block_data_order:
add r6,r8,r6,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r4,r5 @ F_xx_xx
mov r9,r9,ror#31
add r6,r6,r7,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r3,r10,ror#2 @ F_xx_xx
and r11,r4,r5 @ F_xx_xx
add r6,r6,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r6,r6,r10 @ E+=F_40_59(B,C,D)
add r6,r6,r11,ror#2
ldr r9,[r14,#15*4]
@@ -309,15 +370,15 @@ sha1_block_data_order:
add r5,r8,r5,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r3,r4 @ F_xx_xx
mov r9,r9,ror#31
add r5,r5,r6,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r7,r10,ror#2 @ F_xx_xx
and r11,r3,r4 @ F_xx_xx
add r5,r5,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r5,r5,r10 @ E+=F_40_59(B,C,D)
add r5,r5,r11,ror#2
ldr r9,[r14,#15*4]
@@ -326,15 +387,15 @@ sha1_block_data_order:
add r4,r8,r4,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r7,r3 @ F_xx_xx
mov r9,r9,ror#31
add r4,r4,r5,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r6,r10,ror#2 @ F_xx_xx
and r11,r7,r3 @ F_xx_xx
add r4,r4,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r4,r4,r10 @ E+=F_40_59(B,C,D)
add r4,r4,r11,ror#2
ldr r9,[r14,#15*4]
@@ -343,15 +404,15 @@ sha1_block_data_order:
add r3,r8,r3,ror#2 @ E+=K_xx_xx
ldr r12,[r14,#2*4]
eor r9,r9,r10
- eor r11,r11,r12
+ eor r11,r11,r12 @ 1 cycle stall
eor r10,r6,r7 @ F_xx_xx
mov r9,r9,ror#31
add r3,r3,r4,ror#27 @ E+=ROR(A,27)
eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
and r10,r5,r10,ror#2 @ F_xx_xx
and r11,r6,r7 @ F_xx_xx
add r3,r3,r9 @ E+=X[i]
- str r9,[r14,#-4]!
add r3,r3,r10 @ E+=F_40_59(B,C,D)
add r3,r3,r11,ror#2
teq r14,sp
@@ -373,10 +434,14 @@ sha1_block_data_order:
teq r1,r2
bne .Lloop @ [+18], total 1307
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
ldmia sp!,{r4-r12,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
.align 2
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1