diff options
Diffstat (limited to 'main/openssl/crypto/bn/asm/armv4-gf2m.S')
m--------- | main/openssl | 0 | ||||
-rw-r--r-- | main/openssl/crypto/bn/asm/armv4-gf2m.S | 201 |
2 files changed, 0 insertions, 201 deletions
diff --git a/main/openssl b/main/openssl new file mode 160000 +Subproject 4d377a9ce111930d8a8f06dc0e94a892a7f6c51 diff --git a/main/openssl/crypto/bn/asm/armv4-gf2m.S b/main/openssl/crypto/bn/asm/armv4-gf2m.S deleted file mode 100644 index 0fa25b26..00000000 --- a/main/openssl/crypto/bn/asm/armv4-gf2m.S +++ /dev/null @@ -1,201 +0,0 @@ -#include "arm_arch.h" - -.text -.code 32 - -#if __ARM_ARCH__>=7 -.fpu neon -#endif -.type mul_1x1_ialu,%function -.align 5 -mul_1x1_ialu: - mov r4,#0 - bic r5,r1,#3<<30 @ a1=a&0x3fffffff - str r4,[sp,#0] @ tab[0]=0 - add r6,r5,r5 @ a2=a1<<1 - str r5,[sp,#4] @ tab[1]=a1 - eor r7,r5,r6 @ a1^a2 - str r6,[sp,#8] @ tab[2]=a2 - mov r8,r5,lsl#2 @ a4=a1<<2 - str r7,[sp,#12] @ tab[3]=a1^a2 - eor r9,r5,r8 @ a1^a4 - str r8,[sp,#16] @ tab[4]=a4 - eor r4,r6,r8 @ a2^a4 - str r9,[sp,#20] @ tab[5]=a1^a4 - eor r7,r7,r8 @ a1^a2^a4 - str r4,[sp,#24] @ tab[6]=a2^a4 - and r8,r12,r0,lsl#2 - str r7,[sp,#28] @ tab[7]=a1^a2^a4 - - and r9,r12,r0,lsr#1 - ldr r5,[sp,r8] @ tab[b & 0x7] - and r8,r12,r0,lsr#4 - ldr r7,[sp,r9] @ tab[b >> 3 & 0x7] - and r9,r12,r0,lsr#7 - ldr r6,[sp,r8] @ tab[b >> 6 & 0x7] - eor r5,r5,r7,lsl#3 @ stall - mov r4,r7,lsr#29 - ldr r7,[sp,r9] @ tab[b >> 9 & 0x7] - - and r8,r12,r0,lsr#10 - eor r5,r5,r6,lsl#6 - eor r4,r4,r6,lsr#26 - ldr r6,[sp,r8] @ tab[b >> 12 & 0x7] - - and r9,r12,r0,lsr#13 - eor r5,r5,r7,lsl#9 - eor r4,r4,r7,lsr#23 - ldr r7,[sp,r9] @ tab[b >> 15 & 0x7] - - and r8,r12,r0,lsr#16 - eor r5,r5,r6,lsl#12 - eor r4,r4,r6,lsr#20 - ldr r6,[sp,r8] @ tab[b >> 18 & 0x7] - - and r9,r12,r0,lsr#19 - eor r5,r5,r7,lsl#15 - eor r4,r4,r7,lsr#17 - ldr r7,[sp,r9] @ tab[b >> 21 & 0x7] - - and r8,r12,r0,lsr#22 - eor r5,r5,r6,lsl#18 - eor r4,r4,r6,lsr#14 - ldr r6,[sp,r8] @ tab[b >> 24 & 0x7] - - and r9,r12,r0,lsr#25 - eor r5,r5,r7,lsl#21 - eor r4,r4,r7,lsr#11 - ldr r7,[sp,r9] @ tab[b >> 27 & 0x7] - - tst r1,#1<<30 - and r8,r12,r0,lsr#28 - eor r5,r5,r6,lsl#24 - eor r4,r4,r6,lsr#8 - ldr r6,[sp,r8] @ tab[b >> 30 ] - - eorne r5,r5,r0,lsl#30 - eorne r4,r4,r0,lsr#2 - tst r1,#1<<31 - eor r5,r5,r7,lsl#27 - eor r4,r4,r7,lsr#5 - eorne r5,r5,r0,lsl#31 - eorne r4,r4,r0,lsr#1 - eor r5,r5,r6,lsl#30 - eor r4,r4,r6,lsr#2 - - mov pc,lr -.size mul_1x1_ialu,.-mul_1x1_ialu -.global bn_GF2m_mul_2x2 -.type bn_GF2m_mul_2x2,%function -.align 5 -bn_GF2m_mul_2x2: -#if __ARM_ARCH__>=7 - ldr r12,.LOPENSSL_armcap -.Lpic: ldr r12,[pc,r12] - tst r12,#1 - beq .Lialu - - ldr r12, [sp] @ 5th argument - vmov.32 d26, r2, r1 - vmov.32 d27, r12, r3 - vmov.i64 d28, #0x0000ffffffffffff - vmov.i64 d29, #0x00000000ffffffff - vmov.i64 d30, #0x000000000000ffff - - vext.8 d2, d26, d26, #1 @ A1 - vmull.p8 q1, d2, d27 @ F = A1*B - vext.8 d0, d27, d27, #1 @ B1 - vmull.p8 q0, d26, d0 @ E = A*B1 - vext.8 d4, d26, d26, #2 @ A2 - vmull.p8 q2, d4, d27 @ H = A2*B - vext.8 d16, d27, d27, #2 @ B2 - vmull.p8 q8, d26, d16 @ G = A*B2 - vext.8 d6, d26, d26, #3 @ A3 - veor q1, q1, q0 @ L = E + F - vmull.p8 q3, d6, d27 @ J = A3*B - vext.8 d0, d27, d27, #3 @ B3 - veor q2, q2, q8 @ M = G + H - vmull.p8 q0, d26, d0 @ I = A*B3 - veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 - vand d3, d3, d28 - vext.8 d16, d27, d27, #4 @ B4 - veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 - vand d5, d5, d29 - vmull.p8 q8, d26, d16 @ K = A*B4 - veor q3, q3, q0 @ N = I + J - veor d2, d2, d3 - veor d4, d4, d5 - veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 - vand d7, d7, d30 - vext.8 q1, q1, q1, #15 - veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 - vmov.i64 d17, #0 - vext.8 q2, q2, q2, #14 - veor d6, d6, d7 - vmull.p8 q0, d26, d27 @ D = A*B - vext.8 q8, q8, q8, #12 - vext.8 q3, q3, q3, #13 - veor q1, q1, q2 - veor q3, q3, q8 - veor q0, q0, q1 - veor q0, q0, q3 - - vst1.32 {q0}, [r0] - bx lr @ bx lr -.align 4 -.Lialu: -#endif - stmdb sp!,{r4-r10,lr} - mov r10,r0 @ reassign 1st argument - mov r0,r3 @ r0=b1 - ldr r3,[sp,#32] @ load b0 - mov r12,#7<<2 - sub sp,sp,#32 @ allocate tab[8] - - bl mul_1x1_ialu @ a1·b1 - str r5,[r10,#8] - str r4,[r10,#12] - - eor r0,r0,r3 @ flip b0 and b1 - eor r1,r1,r2 @ flip a0 and a1 - eor r3,r3,r0 - eor r2,r2,r1 - eor r0,r0,r3 - eor r1,r1,r2 - bl mul_1x1_ialu @ a0·b0 - str r5,[r10] - str r4,[r10,#4] - - eor r1,r1,r2 - eor r0,r0,r3 - bl mul_1x1_ialu @ (a1+a0)·(b1+b0) - ldmia r10,{r6-r9} - eor r5,r5,r4 - eor r4,r4,r7 - eor r5,r5,r6 - eor r4,r4,r8 - eor r5,r5,r9 - eor r4,r4,r9 - str r4,[r10,#8] - eor r5,r5,r4 - add sp,sp,#32 @ destroy tab[8] - str r5,[r10,#4] - -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r10,pc} -#else - ldmia sp!,{r4-r10,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -#if __ARM_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-(.Lpic+8) -#endif -.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" -.align 5 - -.comm OPENSSL_armcap_P,4,4 |